Repository: qodo-ai/pr-agent
Branch: main
Commit: aaf8fbe21836
Files: 216
Total size: 1.5 MB

Directory structure:
gitextract_ey74c0jr/

├── .dockerignore
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.yml
│   │   ├── config.yml
│   │   ├── feature-request.yml
│   │   └── miscellaneous.yml
│   └── workflows/
│       ├── build-and-test.yaml
│       ├── code_coverage.yaml
│       ├── docs-ci.yaml
│       ├── e2e_tests.yaml
│       ├── pr-agent-review.yaml
│       └── pre-commit.yml
├── .gitignore
├── .pr_agent.toml
├── .pre-commit-config.yaml
├── AGENTS.md
├── CHANGELOG.md
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── Dockerfile.github_action
├── Dockerfile.github_action_dockerhub
├── LICENSE
├── MANIFEST.in
├── README.md
├── RELEASE_NOTES.md
├── SECURITY.md
├── action.yaml
├── codecov.yml
├── docker/
│   ├── Dockerfile
│   └── Dockerfile.lambda
├── docs/
│   ├── README.md
│   ├── docs/
│   │   ├── .gitbook.yaml
│   │   ├── CNAME
│   │   ├── core-abilities/
│   │   │   ├── compression_strategy.md
│   │   │   ├── dynamic_context.md
│   │   │   ├── fetching_ticket_context.md
│   │   │   ├── index.md
│   │   │   ├── interactivity.md
│   │   │   ├── metadata.md
│   │   │   └── self_reflection.md
│   │   ├── css/
│   │   │   └── custom.css
│   │   ├── faq/
│   │   │   └── index.md
│   │   ├── index.md
│   │   ├── installation/
│   │   │   ├── azure.md
│   │   │   ├── bitbucket.md
│   │   │   ├── gitea.md
│   │   │   ├── github.md
│   │   │   ├── gitlab.md
│   │   │   ├── index.md
│   │   │   ├── locally.md
│   │   │   └── pr_agent.md
│   │   ├── overview/
│   │   │   └── data_privacy.md
│   │   ├── summary.md
│   │   ├── tools/
│   │   │   ├── add_docs.md
│   │   │   ├── ask.md
│   │   │   ├── describe.md
│   │   │   ├── generate_labels.md
│   │   │   ├── help.md
│   │   │   ├── help_docs.md
│   │   │   ├── improve.md
│   │   │   ├── index.md
│   │   │   ├── review.md
│   │   │   ├── similar_issues.md
│   │   │   └── update_changelog.md
│   │   └── usage-guide/
│   │       ├── EXAMPLE_BEST_PRACTICE.md
│   │       ├── additional_configurations.md
│   │       ├── automations_and_usage.md
│   │       ├── changing_a_model.md
│   │       ├── configuration_options.md
│   │       ├── index.md
│   │       ├── introduction.md
│   │       └── mail_notifications.md
│   ├── mkdocs.yml
│   └── overrides/
│       ├── main.html
│       └── partials/
│           ├── footer.html
│           └── integrations/
│               └── analytics/
│                   └── custom.html
├── github_action/
│   └── entrypoint.sh
├── pr_agent/
│   ├── __init__.py
│   ├── agent/
│   │   ├── __init__.py
│   │   └── pr_agent.py
│   ├── algo/
│   │   ├── __init__.py
│   │   ├── ai_handlers/
│   │   │   ├── base_ai_handler.py
│   │   │   ├── langchain_ai_handler.py
│   │   │   ├── litellm_ai_handler.py
│   │   │   ├── litellm_helpers.py
│   │   │   └── openai_ai_handler.py
│   │   ├── cli_args.py
│   │   ├── file_filter.py
│   │   ├── git_patch_processing.py
│   │   ├── language_handler.py
│   │   ├── pr_processing.py
│   │   ├── token_handler.py
│   │   ├── types.py
│   │   └── utils.py
│   ├── cli.py
│   ├── cli_pip.py
│   ├── config_loader.py
│   ├── custom_merge_loader.py
│   ├── git_providers/
│   │   ├── __init__.py
│   │   ├── azuredevops_provider.py
│   │   ├── bitbucket_provider.py
│   │   ├── bitbucket_server_provider.py
│   │   ├── codecommit_client.py
│   │   ├── codecommit_provider.py
│   │   ├── gerrit_provider.py
│   │   ├── git_provider.py
│   │   ├── gitea_provider.py
│   │   ├── github_provider.py
│   │   ├── gitlab_provider.py
│   │   ├── local_git_provider.py
│   │   └── utils.py
│   ├── identity_providers/
│   │   ├── __init__.py
│   │   ├── default_identity_provider.py
│   │   └── identity_provider.py
│   ├── log/
│   │   └── __init__.py
│   ├── secret_providers/
│   │   ├── __init__.py
│   │   ├── aws_secrets_manager_provider.py
│   │   ├── google_cloud_storage_secret_provider.py
│   │   └── secret_provider.py
│   ├── servers/
│   │   ├── __init__.py
│   │   ├── atlassian-connect-qodo-merge.json
│   │   ├── atlassian-connect.json
│   │   ├── azuredevops_server_webhook.py
│   │   ├── bitbucket_app.py
│   │   ├── bitbucket_server_webhook.py
│   │   ├── gerrit_server.py
│   │   ├── gitea_app.py
│   │   ├── github_action_runner.py
│   │   ├── github_app.py
│   │   ├── github_lambda_webhook.py
│   │   ├── github_polling.py
│   │   ├── gitlab_lambda_webhook.py
│   │   ├── gitlab_webhook.py
│   │   ├── gunicorn_config.py
│   │   ├── help.py
│   │   └── utils.py
│   ├── settings/
│   │   ├── .secrets_template.toml
│   │   ├── code_suggestions/
│   │   │   ├── pr_code_suggestions_prompts.toml
│   │   │   ├── pr_code_suggestions_prompts_not_decoupled.toml
│   │   │   └── pr_code_suggestions_reflect_prompts.toml
│   │   ├── configuration.toml
│   │   ├── custom_labels.toml
│   │   ├── generated_code_ignore.toml
│   │   ├── ignore.toml
│   │   ├── language_extensions.toml
│   │   ├── pr_add_docs.toml
│   │   ├── pr_custom_labels.toml
│   │   ├── pr_description_prompts.toml
│   │   ├── pr_evaluate_prompt_response.toml
│   │   ├── pr_help_docs_headings_prompts.toml
│   │   ├── pr_help_docs_prompts.toml
│   │   ├── pr_help_prompts.toml
│   │   ├── pr_information_from_user_prompts.toml
│   │   ├── pr_line_questions_prompts.toml
│   │   ├── pr_questions_prompts.toml
│   │   ├── pr_reviewer_prompts.toml
│   │   └── pr_update_changelog_prompts.toml
│   └── tools/
│       ├── __init__.py
│       ├── pr_add_docs.py
│       ├── pr_code_suggestions.py
│       ├── pr_config.py
│       ├── pr_description.py
│       ├── pr_generate_labels.py
│       ├── pr_help_docs.py
│       ├── pr_help_message.py
│       ├── pr_line_questions.py
│       ├── pr_questions.py
│       ├── pr_reviewer.py
│       ├── pr_similar_issue.py
│       ├── pr_update_changelog.py
│       └── ticket_pr_compliance_check.py
├── pr_compliance_checklist.yaml
├── pyproject.toml
├── requirements-dev.txt
├── requirements.txt
├── setup.py
└── tests/
    ├── e2e_tests/
    │   ├── e2e_utils.py
    │   ├── langchain_ai_handler.py
    │   ├── test_bitbucket_app.py
    │   ├── test_gitea_app.py
    │   ├── test_github_app.py
    │   └── test_gitlab_webhook.py
    ├── health_test/
    │   └── main.py
    └── unittest/
        ├── test_add_docs_trigger.py
        ├── test_aws_secrets_manager_provider.py
        ├── test_azure_devops_comment.py
        ├── test_azure_devops_parsing.py
        ├── test_bitbucket_provider.py
        ├── test_clip_tokens.py
        ├── test_codecommit_client.py
        ├── test_codecommit_provider.py
        ├── test_config_loader_secrets.py
        ├── test_convert_to_markdown.py
        ├── test_delete_hunks.py
        ├── test_extend_patch.py
        ├── test_extract_issue_from_branch.py
        ├── test_fetching_sub_issues.py
        ├── test_file_filter.py
        ├── test_find_line_number_of_relevant_line_in_file.py
        ├── test_fix_json_escape_char.py
        ├── test_fix_output.py
        ├── test_fresh_vars_functionality.py
        ├── test_get_max_tokens.py
        ├── test_gitea_provider.py
        ├── test_github_action_output.py
        ├── test_gitlab_provider.py
        ├── test_gitlab_webhook_port.py
        ├── test_handle_patch_deletions.py
        ├── test_ignore_repositories.py
        ├── test_language_handler.py
        ├── test_litellm_reasoning_effort.py
        ├── test_load_yaml.py
        ├── test_parse_code_suggestion.py
        ├── test_pr_update_changelog.py
        ├── test_secret_provider_factory.py
        ├── test_similar_issue_non_github.py
        └── test_try_fix_yaml.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .dockerignore
================================================
.venv/
venv/
pr_agent/settings/.secrets.toml
pics/
pr_agent.egg-info/
build/


================================================
FILE: .github/ISSUE_TEMPLATE/bug-report.yml
================================================
name: "\U0001FAB2 Bug Report"
description: Submit a bug report
labels: ["bug"]
body:

  - type: dropdown
    id: information-git-provider
    attributes:
      label: Git provider
      description: 'The problem arises when using:'
      options:
        - "Github Cloud"
        - "Github Enterprise"
        - "Gitlab"
        - "Bitbucket Cloud"
        - "Bitbucket Server"
        - "Azure"
        - "Other"
    validations:
      required: true

  - type: textarea
    id: system-info
    attributes:
      label: System Info
      description: Please share your system info with us.
      placeholder: model used, deployment type (action/app/cli/...), etc...
    validations:
      required: true

  - type: textarea
    id: bug-details
    attributes:
      label: Bug details
      description: Please describe the problem.
      placeholder: Describe the problem
    validations:
      required: true


================================================
FILE: .github/ISSUE_TEMPLATE/config.yml
================================================
blank_issues_enabled: false
version: 0.1
contact_links:
  - name: Discussions
    url: https://github.com/qodo-ai/pr-agent/discussions
    about: GitHub Discussions

  - name: Discord community
    url: https://discord.com/channels/1057273017547378788/1126104260430528613
    about: Join our discord community


================================================
FILE: .github/ISSUE_TEMPLATE/feature-request.yml
================================================
name: "\U0001F4A1 Feature request"
description: Submit a proposal/request for a new PR-Agent feature
labels: ["feature"]
body:
  - type: textarea
    id: feature-request
    validations:
      required: true
    attributes:
      label: Feature request
      description: |
        Description of the feature proposal.

  - type: textarea
    id: motivation
    validations:
      required: true
    attributes:
      label: Motivation
      description: |
        Outline the motivation for the proposal.


================================================
FILE: .github/ISSUE_TEMPLATE/miscellaneous.yml
================================================
name: "❔ General Issue"
description: Submit a general issue
labels: ["general"]
body:

  - type: dropdown
    id: information-git-provider
    attributes:
      label: Git provider (optional)
      description: 'Git Provider:'
      options:
        - "Github Cloud"
        - "Github Enterprise"
        - "Gitlab"
        - "Bitbucket Cloud"
        - "Bitbucket Server"
        - "Azure"
        - "Other"

  - type: textarea
    id: system-info
    attributes:
      label: System Info (optional)
      description: Please share your system info with us.
      placeholder: model used, deployment type (action/app/cli/...), etc...
    validations:
      required: false

  - type: textarea
    id: issues-details
    attributes:
      label: Issues details
      description: Please share the issues details.
      placeholder: Describe the issue
    validations:
      required: true


================================================
FILE: .github/workflows/build-and-test.yaml
================================================
name: Build-and-test

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

jobs:
  build-and-test:
    runs-on: ubuntu-latest

    steps:
      - id: checkout
        uses: actions/checkout@v6

      - id: dockerx
        name: Setup Docker Buildx
        uses: docker/setup-buildx-action@v3

      - id: build
        name: Build dev docker
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./docker/Dockerfile
          push: false
          load: true
          tags: codiumai/pr-agent:test
          cache-from: type=gha,scope=dev
          cache-to: type=gha,mode=max,scope=dev
          target: test

      - id: test
        name: Test dev docker
        run: |
          docker run --rm codiumai/pr-agent:test pytest -v tests/unittest


================================================
FILE: .github/workflows/code_coverage.yaml
================================================
name: Code-coverage

on:
  workflow_dispatch:
  # push:
  #   branches:
  #     - main
  pull_request:
    branches:
      - main

jobs:
  build-and-test:
    runs-on: ubuntu-latest

    steps:
      - id: checkout
        uses: actions/checkout@v6

      - id: dockerx
        name: Setup Docker Buildx
        uses: docker/setup-buildx-action@v3

      - id: build
        name: Build dev docker
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./docker/Dockerfile
          push: false
          load: true
          tags: codiumai/pr-agent:test
          cache-from: type=gha,scope=dev
          cache-to: type=gha,mode=max,scope=dev
          target: test

      - id: code_cov
        name: Test dev docker
        run: |
          docker run --name test_container codiumai/pr-agent:test  pytest  tests/unittest --cov=pr_agent --cov-report term --cov-report xml:coverage.xml
          docker cp test_container:/app/coverage.xml coverage.xml
          docker rm test_container

      - name: Validate coverage report
        run: |
          if [ ! -f coverage.xml ]; then
            echo "Coverage report not found"
            exit 1
          fi
      - name: Upload coverage to Codecov
        uses: codecov/codecov-action@v5
        with:
          token: ${{ secrets.CODECOV_TOKEN }}


================================================
FILE: .github/workflows/docs-ci.yaml
================================================
name: docs-ci
on:
  push:
    branches:
      - main
      - add-docs-portal
    paths:
      - docs/**
permissions:
  contents: write
jobs:
  deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - name: Configure Git Credentials
        run: |
          git config user.name github-actions[bot]
          git config user.email 41898282+github-actions[bot]@users.noreply.github.com
      - uses: actions/setup-python@v5
        with:
          python-version: 3.x
      - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
      - uses: actions/cache@v4
        with:
          key: mkdocs-material-${{ env.cache_id }}
          path: .cache
          restore-keys: |
            mkdocs-material-
      - run: pip install mkdocs-material
      - run: pip install "mkdocs-material[imaging]"
      - run: pip install mkdocs-glightbox
      - run: mkdocs gh-deploy -f docs/mkdocs.yml --force


================================================
FILE: .github/workflows/e2e_tests.yaml
================================================
name: PR-Agent E2E tests

on:
  workflow_dispatch:
#  schedule:
#    - cron: '0 0 * * *' # This cron expression runs the workflow every night at midnight UTC

jobs:
  pr_agent_job:
    runs-on: ubuntu-latest
    name: PR-Agent E2E GitHub App Test
    steps:
      - name: Checkout repository
        uses: actions/checkout@v6

      - name: Setup Docker Buildx
        uses: docker/setup-buildx-action@v3

      - id: build
        name: Build dev docker
        uses: docker/build-push-action@v6
        with:
          context: .
          file: ./docker/Dockerfile
          push: false
          load: true
          tags: codiumai/pr-agent:test
          cache-from: type=gha,scope=dev
          cache-to: type=gha,mode=max,scope=dev
          target: test

      - id: test1
        name: E2E test github app
        run: |
          docker run -e GITHUB.USER_TOKEN=${{ secrets.TOKEN_GITHUB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_github_app.py

      - id: test2
        name: E2E gitlab webhook
        run: |
          docker run -e gitlab.PERSONAL_ACCESS_TOKEN=${{ secrets.TOKEN_GITLAB }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_gitlab_webhook.py

      - id: test3
        name: E2E bitbucket app
        run: |
          docker run -e BITBUCKET.USERNAME=${{ secrets.BITBUCKET_USERNAME }}  -e BITBUCKET.PASSWORD=${{ secrets.BITBUCKET_PASSWORD }} --rm codiumai/pr-agent:test pytest -v tests/e2e_tests/test_bitbucket_app.py


================================================
FILE: .github/workflows/pr-agent-review.yaml
================================================
# This workflow enables developers to call PR-Agents `/[actions]` in PR's comments and upon PR creation.
# Learn more at https://www.codium.ai/pr-agent/
# This is v0.2 of this workflow file

name: PR-Agent

on:
# pull_request:
# issue_comment:
  workflow_dispatch:

permissions:
  issues: write
  pull-requests: write

jobs:
  pr_agent_job:
    runs-on: ubuntu-latest
    name: Run pr agent on every pull request
    steps:
      - name: PR Agent action step
        id: pragent
        uses: Codium-ai/pr-agent@main
        env:
          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
          OPENAI_ORG: ${{ secrets.OPENAI_ORG }} # optional
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          PINECONE.API_KEY: ${{ secrets.PINECONE_API_KEY }}
          PINECONE.ENVIRONMENT: ${{ secrets.PINECONE_ENVIRONMENT }}
          GITHUB_ACTION_CONFIG.AUTO_DESCRIBE: true
          GITHUB_ACTION_CONFIG.AUTO_REVIEW: true
          GITHUB_ACTION_CONFIG.AUTO_IMPROVE: true


================================================
FILE: .github/workflows/pre-commit.yml
================================================
# disabled. We might run it manually if needed.
name: pre-commit

on:
  workflow_dispatch:
#  pull_request:
#  push:
#    branches: [main]

jobs:
  pre-commit:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v6
      - uses: actions/setup-python@v5
      # SEE https://github.com/pre-commit/action
      - uses: pre-commit/action@v3.0.1


================================================
FILE: .gitignore
================================================
.idea/
.lsp/
.vscode/
.env
.venv/
venv/
pr_agent/settings/.secrets.toml
__pycache__
dist/
*.egg-info/
build/
.DS_Store
docs/.cache/
.qodo
poetry.lock


================================================
FILE: .pr_agent.toml
================================================
[pr_reviewer]
enable_review_labels_effort = true
enable_auto_approval = true

[github_app]
pr_commands = [
    "/describe --pr_description.publish_description_as_comment=true",
    "/improve",
    "/agentic_review"
]

handle_push_trigger = true
push_commands = [
    "/improve",
    "/agentic_review"
]

[review_agent]
enabled = true
publish_output = true


================================================
FILE: .pre-commit-config.yaml
================================================
# See https://pre-commit.com for more information
# See https://pre-commit.com/hooks.html for more hooks

default_language_version:
  python: python3

repos:
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v5.0.0
    hooks:
      - id: check-added-large-files
      - id: check-toml
      - id: check-yaml
      - id: end-of-file-fixer
      - id: trailing-whitespace
  # - repo: https://github.com/rhysd/actionlint
  #   rev: v1.7.3
  #   hooks:
  #     - id: actionlint
  - repo: https://github.com/pycqa/isort
    # rev must match what's in dev-requirements.txt
    rev: 5.13.2
    hooks:
      - id: isort
  # - repo: https://github.com/PyCQA/bandit
  #   rev: 1.7.10
  #   hooks:
  #     - id: bandit
  #       args: [
  #         "-c", "pyproject.toml",
  #       ]
  # - repo: https://github.com/astral-sh/ruff-pre-commit
  #   rev: v0.7.1
  #   hooks:
  #     - id: ruff
  #       args:
  #         - --fix
  #     - id: ruff-format
  # -   repo: https://github.com/PyCQA/autoflake
  #     rev: v2.3.1
  #     hooks:
  #     -   id: autoflake
  #         args:
  #           - --in-place
  #           - --remove-all-unused-imports
  #           - --remove-unused-variables


================================================
FILE: AGENTS.md
================================================
# Repository Guidelines

## Dos and Don’ts

- **Do** match the interpreter requirement declared in `pyproject.toml` (Python ≥ 3.12) and install `requirements.txt` plus `requirements-dev.txt` before running tools.
- **Do** run tests with `PYTHONPATH=.` set to keep imports functional (for example `PYTHONPATH=. ./.venv/bin/pytest tests/unittest/test_fix_json_escape_char.py -q`).
- **Do** adjust configuration through `.pr_agent.toml` or files under `pr_agent/settings/` instead of hard-coding values.
- **Don’t** commit secrets or access tokens; rely on environment variables as shown in the health and e2e tests.
- **Don’t** reformat or reorder files globally; match existing 120-character lines, import ordering, and docstring style.
- **Don’t** delete or rename configuration, prompt, or workflow files without maintainer approval.

## Project Structure and Module Organization

PR-Agent automates AI-assisted reviews for pull requests across multiple git providers.

- `pr_agent/agent/` orchestrates commands (`review`, `describe`, `improve`, etc.) via `pr_agent/agent/pr_agent.py`.
- `pr_agent/tools/` implements individual capabilities such as reviewers, code suggestions, docs updates, and label generation.
- `pr_agent/git_providers/` and `pr_agent/identity_providers/` handle integrations with GitHub, GitLab, Bitbucket, Azure DevOps, and secrets.
- `pr_agent/settings/` stores Dynaconf defaults (prompts, configuration templates, ignore lists) respected at runtime; `.pr_agent.toml` overrides repository-level behavior.
- `tests/unittest/`, `tests/e2e_tests/`, and `tests/health_test/` contain pytest-based unit, end-to-end, and smoke checks.
- `docs/` holds the MkDocs site (`docs/mkdocs.yml` plus content under `docs/docs/`); overrides live in `docs/overrides/`.
- `.github/workflows/` defines CI pipelines for unit tests, coverage, docs deployment, pre-commit, and PR-agent self-review.
- `docker/` and the root Dockerfiles provide build targets for services (`github_app`, `gitlab_webhook`, etc.) and the `test` stage used in CI.

## Build, Test, and Development Commands

- Create or activate a virtual environment, then install runtime dependencies with `pip install -r requirements.txt`; add development tooling via `pip install -r requirements-dev.txt`.
- Run a single unit test (verified): `PYTHONPATH=. ./.venv/bin/pytest tests/unittest/test_fix_json_escape_char.py -q`.
- Run the full unit suite: `PYTHONPATH=. ./.venv/bin/pytest tests/unittest -v`.
- Execute the CLI locally once dependencies and API keys are available: `python -m pr_agent.cli --pr_url <https://host/org/repo/pull/123> review`.
- Build the test Docker target mirror of CI when containerizing: `docker build -f docker/Dockerfile --target test .` (loads dev dependencies and copies `tests/`).
- Generate and deploy documentation with MkDocs after installing the same extras as CI (`mkdocs-material`, `mkdocs-glightbox`): `mkdocs serve -f docs/mkdocs.yml` for previews and `mkdocs gh-deploy -f docs/mkdocs.yml` for publication.

## Coding Style and Naming Conventions

- Python sources follow the Ruff configuration in `pyproject.toml` (`line-length = 120`, Pyflakes plus `flake8-bugbear` checks, and isort ordering). Keep imports grouped as isort would produce and prefer double quotes for strings.
- Pre-commit (`.pre-commit-config.yaml`) enforces trailing whitespace cleanup, final newlines, TOML/YAML validity, and optional `isort`; run `pre-commit run --all-files` before submitting patches if installed.
- Match existing docstring and comment style—concise English comments using imperative phrasing only where necessary.
- Configuration files in `pr_agent/settings/` are TOML; preserve formatting, section order, and comments when editing prompts or defaults.
- Markdown in `docs/` uses MkDocs conventions (YAML front matter absent; rely on heading hierarchy already in place).

## Testing Guidelines

- Pytest is the standard framework; keep new tests under the closest matching directory (`tests/unittest/` for unit logic, `tests/e2e_tests/` for integration flows, `tests/health_test/` for smoke coverage).
- Prefer focused unit tests that isolate helpers in `pr_agent/algo/`, `pr_agent/tools/`, or provider adapters; use parameterized tests where existing files already do so.
- Set `PYTHONPATH=.` when invoking pytest from the repository root to avoid import errors.
- End-to-end suites require provider tokens (`TOKEN_GITHUB`, `TOKEN_GITLAB`, `BITBUCKET_USERNAME`, `BITBUCKET_PASSWORD`) and may take several minutes; run them only when credentials and sandboxes are configured.
- The health test (`tests/health_test/main.py`) exercises `/describe`, `/review`, and `/improve`; update expected artifacts if prompts change meaningfully.

## Commit and Pull Request Guidelines

- Follow `CONTRIBUTING.md`: keep changes focused, add or update tests, and use Conventional Commit-style messages (e.g., `fix: handle missing repo settings gracefully`).
- Target branch names follow `feature/<name>` or `fix/<issue>` patterns for substantial work.
- Reference related issues and update README or docs when user-facing behavior shifts.
- Ensure CI workflows (`build-and-test`, `code-coverage`, `docs-ci`) succeed locally or in draft PRs before requesting review; reproduce failures with the documented commands above.
- Include screenshots or terminal captures when modifying user-visible output or documentation previews.

## Safety and Permissions

- Ask for confirmation before adding dependencies, renaming files, or changing workflow definitions; many consumers embed these paths and prompts.
- Stay within existing formatting and directory conventions—avoid mass refactors, re-sorting of prompts, or reformatting Markdown beyond the touched sections.
- You may read files, list directories, and run targeted lint/test/doc commands without prior approval; coordinate before launching full Docker builds or e2e suites that rely on external credentials.
- Never commit cached credentials, API keys, or coverage artifacts; CI already handles secrets through GitHub Actions.
- Treat prompt and configuration files as single sources of truth—update mirrors (`.pr_agent.toml`, `pr_agent/settings/*.toml`) together when behavior changes.

## Security and Configuration Tips

- Secrets should be supplied through environment variables (see usages in `tests/e2e_tests/test_github_app.py` and `tests/health_test/main.py`); do not persist them in code or configuration files.
- Adjust runtime behavior by overriding keys in `.pr_agent.toml` or by supplying repository-specific Dynaconf files; keep overrides minimal and documented inside the PR description.
- Review `SECURITY.md` before disclosing vulnerabilities and follow its contact instructions for responsible reporting.


================================================
FILE: CHANGELOG.md
================================================
## 2023-08-03

### Optimized

- Optimized PR diff processing by introducing caching for diff files, reducing the number of API calls.
- Refactored `load_large_diff` function to generate a patch only when necessary.
- Fixed a bug in the GitLab provider where the new file was not retrieved correctly.

## 2023-08-02

### Enhanced

- Updated several tools in the `pr_agent` package to use commit messages in their functionality.
- Commit messages are now retrieved and stored in the `vars` dictionary for each tool.
- Added a section to display the commit messages in the prompts of various tools.

## 2023-08-01

### Enhanced

- Introduced the ability to retrieve commit messages from pull requests across different git providers.
- Implemented commit messages retrieval for GitHub and GitLab providers.
- Updated the PR description template to include a section for commit messages if they exist.
- Added support for repository-specific configuration files (.pr_agent.yaml) for the PR Agent.
- Implemented this feature for both GitHub and GitLab providers.
- Added a new configuration option 'use_repo_settings_file' to enable or disable the use of a repo-specific settings file.

## 2023-07-30

### Enhanced

- Added the ability to modify any configuration parameter from 'configuration.toml' on-the-fly.
- Updated the command line interface and bot commands to accept configuration changes as arguments.
- Improved the PR agent to handle additional arguments for each action.

## 2023-07-28

### Improved

- Enhanced error handling and logging in the GitLab provider.
- Improved handling of inline comments and code suggestions in GitLab.
- Fixed a bug where an additional unneeded line was added to code suggestions in GitLab.

## 2023-07-26

### Added

- New feature for updating the CHANGELOG.md based on the contents of a PR.
- Added support for this feature for the Github provider.
- New configuration settings and prompts for the changelog update feature.


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Code of Conduct

As contributors and maintainers of this project, and in the interest of fostering an open
and welcoming community, we pledge to respect all people who contribute through reporting
issues, posting feature requests, updating documentation, submitting pull requests or
patches, and other activities.

We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age,
religion, or nationality.

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery
* Personal attacks
* Trolling or insulting/derogatory comments
* Public or private harassment
* Publishing other's private information, such as physical or electronic addresses,
  without explicit permission
* Other unethical or unprofessional conduct

Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this
Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors
that they deem inappropriate, threatening, offensive, or harmful.

By adopting this Code of Conduct, project maintainers commit themselves to fairly and
consistently applying these principles to every aspect of managing this project. Project
maintainers who do not follow or enforce the Code of Conduct may be permanently removed
from the project team.

This Code of Conduct applies both within project spaces and in public spaces when an
individual is representing the project or its community.

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by
contacting a project maintainer at dana.f@qodo.ai . All complaints will
be reviewed and investigated and will result in a response that is deemed necessary and
appropriate to the circumstances. Maintainers are obligated to maintain confidentiality
with regard to the reporter of an incident.

This Code of Conduct is adapted from the
[Contributor Covenant](https://contributor-covenant.org), version 1.3.0, available at
[contributor-covenant.org/version/1/3/0/](https://contributor-covenant.org/version/1/3/0/)


================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to PR-Agent

Thank you for your interest in contributing to the PR-Agent project!

## Getting Started

1. Fork the repository and clone your fork
2. Install Python 3.10 or higher
3. Install dependencies (`requirements.txt` and `requirements-dev.txt`)
4. Create a new branch for your contribution:
   - For new features: `git checkout -b feature/your-feature-name`
   - For bug fixes: `git checkout -b fix/issue-description`
5. Make your changes
6. Write or update tests as needed
7. Run tests locally to ensure everything passes
8. Commit your changes using conventional commit messages
9. Push to your fork and submit a pull request

## Development Guidelines

- Keep pull requests focused on a single feature or fix
- Follow the existing code style and formatting conventions
- Add unit tests for any new functionality using pytest
- Ensure test coverage for your changes
- Update documentation as needed

## Pull Request Process

1. Ensure your PR includes a clear description of the changes
2. Link any related issues
3. Update the README.md if needed
4. Wait for review from maintainers

## Questions or Need Help?

- Join our [Discord community](https://discord.com/channels/1057273017547378788/1126104260430528613) for questions and discussions
- Check the [documentation](https://qodo-merge-docs.qodo.ai/) for detailed information
- Report bugs or request features through [GitHub Issues](https://github.com/qodo-ai/pr-agent/issues)


================================================
FILE: Dockerfile.github_action
================================================
FROM python:3.12.10-slim AS base

RUN apt-get update && apt-get install --no-install-recommends -y git curl && apt-get clean && rm -rf /var/lib/apt/lists/*

WORKDIR /app
ADD pyproject.toml .
ADD requirements.txt .
RUN pip install --no-cache-dir . && rm pyproject.toml requirements.txt
ENV PYTHONPATH=/app
ADD docs docs
ADD pr_agent pr_agent
ADD github_action/entrypoint.sh /
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]


================================================
FILE: Dockerfile.github_action_dockerhub
================================================
FROM codiumai/pr-agent:github_action


================================================
FILE: LICENSE
================================================
                    GNU AFFERO GENERAL PUBLIC LICENSE
                       Version 3, 19 November 2007

 Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.

  The licenses for most software and other practical works are designed
to take away your freedom to share and change the works.  By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.

  Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.

  A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate.  Many developers of free software are heartened and
encouraged by the resulting cooperation.  However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.

  The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community.  It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server.  Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.

  An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals.  This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.

  The precise terms and conditions for copying, distribution and
modification follow.

                       TERMS AND CONDITIONS

  0. Definitions.

  "This License" refers to version 3 of the GNU Affero General Public License.

  "Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.

  "The Program" refers to any copyrightable work licensed under this
License.  Each licensee is addressed as "you".  "Licensees" and
"recipients" may be individuals or organizations.

  To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy.  The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.

  A "covered work" means either the unmodified Program or a work based
on the Program.

  To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy.  Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.

  To "convey" a work means any kind of propagation that enables other
parties to make or receive copies.  Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.

  An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License.  If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.

  1. Source Code.

  The "source code" for a work means the preferred form of the work
for making modifications to it.  "Object code" means any non-source
form of a work.

  A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.

  The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form.  A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.

  The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities.  However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work.  For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.

  The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.

  The Corresponding Source for a work in source code form is that
same work.

  2. Basic Permissions.

  All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met.  This License explicitly affirms your unlimited
permission to run the unmodified Program.  The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work.  This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.

  You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force.  You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright.  Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.

  Conveying under any other circumstances is permitted solely under
the conditions stated below.  Sublicensing is not allowed; section 10
makes it unnecessary.

  3. Protecting Users' Legal Rights From Anti-Circumvention Law.

  No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.

  When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.

  4. Conveying Verbatim Copies.

  You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.

  You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.

  5. Conveying Modified Source Versions.

  You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:

    a) The work must carry prominent notices stating that you modified
    it, and giving a relevant date.

    b) The work must carry prominent notices stating that it is
    released under this License and any conditions added under section
    7.  This requirement modifies the requirement in section 4 to
    "keep intact all notices".

    c) You must license the entire work, as a whole, under this
    License to anyone who comes into possession of a copy.  This
    License will therefore apply, along with any applicable section 7
    additional terms, to the whole of the work, and all its parts,
    regardless of how they are packaged.  This License gives no
    permission to license the work in any other way, but it does not
    invalidate such permission if you have separately received it.

    d) If the work has interactive user interfaces, each must display
    Appropriate Legal Notices; however, if the Program has interactive
    interfaces that do not display Appropriate Legal Notices, your
    work need not make them do so.

  A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit.  Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.

  6. Conveying Non-Source Forms.

  You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:

    a) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by the
    Corresponding Source fixed on a durable physical medium
    customarily used for software interchange.

    b) Convey the object code in, or embodied in, a physical product
    (including a physical distribution medium), accompanied by a
    written offer, valid for at least three years and valid for as
    long as you offer spare parts or customer support for that product
    model, to give anyone who possesses the object code either (1) a
    copy of the Corresponding Source for all the software in the
    product that is covered by this License, on a durable physical
    medium customarily used for software interchange, for a price no
    more than your reasonable cost of physically performing this
    conveying of source, or (2) access to copy the
    Corresponding Source from a network server at no charge.

    c) Convey individual copies of the object code with a copy of the
    written offer to provide the Corresponding Source.  This
    alternative is allowed only occasionally and noncommercially, and
    only if you received the object code with such an offer, in accord
    with subsection 6b.

    d) Convey the object code by offering access from a designated
    place (gratis or for a charge), and offer equivalent access to the
    Corresponding Source in the same way through the same place at no
    further charge.  You need not require recipients to copy the
    Corresponding Source along with the object code.  If the place to
    copy the object code is a network server, the Corresponding Source
    may be on a different server (operated by you or a third party)
    that supports equivalent copying facilities, provided you maintain
    clear directions next to the object code saying where to find the
    Corresponding Source.  Regardless of what server hosts the
    Corresponding Source, you remain obligated to ensure that it is
    available for as long as needed to satisfy these requirements.

    e) Convey the object code using peer-to-peer transmission, provided
    you inform other peers where the object code and Corresponding
    Source of the work are being offered to the general public at no
    charge under subsection 6d.

  A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.

  A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling.  In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage.  For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product.  A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.

  "Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source.  The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.

  If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information.  But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).

  The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed.  Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.

  Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.

  7. Additional Terms.

  "Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law.  If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.

  When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it.  (Additional permissions may be written to require their own
removal in certain cases when you modify the work.)  You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.

  Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:

    a) Disclaiming warranty or limiting liability differently from the
    terms of sections 15 and 16 of this License; or

    b) Requiring preservation of specified reasonable legal notices or
    author attributions in that material or in the Appropriate Legal
    Notices displayed by works containing it; or

    c) Prohibiting misrepresentation of the origin of that material, or
    requiring that modified versions of such material be marked in
    reasonable ways as different from the original version; or

    d) Limiting the use for publicity purposes of names of licensors or
    authors of the material; or

    e) Declining to grant rights under trademark law for use of some
    trade names, trademarks, or service marks; or

    f) Requiring indemnification of licensors and authors of that
    material by anyone who conveys the material (or modified versions of
    it) with contractual assumptions of liability to the recipient, for
    any liability that these contractual assumptions directly impose on
    those licensors and authors.

  All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10.  If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term.  If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.

  If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.

  Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.

  8. Termination.

  You may not propagate or modify a covered work except as expressly
provided under this License.  Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).

  However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.

  Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.

  Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License.  If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.

  9. Acceptance Not Required for Having Copies.

  You are not required to accept this License in order to receive or
run a copy of the Program.  Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance.  However,
nothing other than this License grants you permission to propagate or
modify any covered work.  These actions infringe copyright if you do
not accept this License.  Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.

  10. Automatic Licensing of Downstream Recipients.

  Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License.  You are not responsible
for enforcing compliance by third parties with this License.

  An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations.  If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.

  You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License.  For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.

  11. Patents.

  A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based.  The
work thus licensed is called the contributor's "contributor version".

  A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version.  For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.

  Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.

  In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement).  To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.

  If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients.  "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.

  If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.

  A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License.  You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.

  Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.

  12. No Surrender of Others' Freedom.

  If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all.  For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.

  13. Remote Network Interaction; Use with the GNU General Public License.

  Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software.  This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.

  Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work.  The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.

  14. Revised Versions of this License.

  The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time.  Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

  Each version is given a distinguishing version number.  If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation.  If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.

  If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.

  Later license versions may give you additional or different
permissions.  However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.

  15. Disclaimer of Warranty.

  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.

  16. Limitation of Liability.

  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.

  17. Interpretation of Sections 15 and 16.

  If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    <one line to give the program's name and a brief idea of what it does.>
    Copyright (C) <year>  <name of author>

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published
    by the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.

Also add information on how to contact you by electronic and paper mail.

  If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source.  For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code.  There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.

  You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.


================================================
FILE: MANIFEST.in
================================================
recursive-include pr_agent *.toml
recursive-exclude pr_agent *.secrets.toml


================================================
FILE: README.md
================================================
<a href="https://github.com/Codium-ai/pr-agent/commits/main">
<img alt="GitHub" src="https://img.shields.io/github/last-commit/Codium-ai/pr-agent/main?style=for-the-badge" height="20">
</a>

<br />

# 🚀 PR Agent - The Original Open-Source PR Reviewer.

 This repository contains the open-source PR Agent Project. 
 It is not the Qodo free tier.
 
Try the free version on our website.

👉[Get Started Now](www.qodo.ai/get-started/)

PR-Agent is an open-source, AI-powered code review agent and a community-maintained legacy project of Qodo. It is distinct from Qodo’s primary AI code review offering, which provides a feature-rich, context-aware experience. Qodo now offers a free tier that integrates seamlessly with GitHub, GitLab, Bitbucket, and Azure DevOps for high-quality automated reviews.

## Table of Contents

- [Getting Started](#getting-started)
- [Why Use PR-Agent?](#why-use-pr-agent)
- [Features](#features)
- [See It in Action](#see-it-in-action)
- [Try It Now](#try-it-now)
- [How It Works](#how-it-works)
- [Data Privacy](#data-privacy)
- [Contributing](#contributing)

## Getting Started

### 🚀 Quick Start for PR-Agent

#### 1. Try it Instantly (No Setup)
Test PR-Agent on any public GitHub repository by commenting `@CodiumAI-Agent /improve`

#### 2. GitHub Action (Recommended)
Add automated PR reviews to your repository with a simple workflow file:
```yaml
# .github/workflows/pr-agent.yml
name: PR Agent
on:
  pull_request:
    types: [opened, synchronize]
jobs:
  pr_agent_job:
    runs-on: ubuntu-latest
    steps:
    - name: PR Agent action step
      uses: Codium-ai/pr-agent@main
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
```
[Full GitHub Action setup guide](https://qodo-merge-docs.qodo.ai/installation/github/#run-as-a-github-action)

#### 3. CLI Usage (Local Development)
Run PR-Agent locally on your repository:
```bash
pip install pr-agent
export OPENAI_KEY=your_key_here
pr-agent --pr_url https://github.com/owner/repo/pull/123 review
```
[Complete CLI setup guide](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#local-repo-cli)

#### 4. Other Platforms
- [GitLab webhook setup](https://qodo-merge-docs.qodo.ai/installation/gitlab/)
- [BitBucket app installation](https://qodo-merge-docs.qodo.ai/installation/bitbucket/)
- [Azure DevOps setup](https://qodo-merge-docs.qodo.ai/installation/azure/)

[//]: # (## News and Updates)

[//]: # ()
[//]: # (## Aug 8, 2025)

[//]: # ()
[//]: # ()
[//]: # ()
[//]: # (## Jul 17, 2025)

[//]: # ()
[//]: # (Introducing `/compliance`, a new Qodo Merge 💎 tool that runs comprehensive checks for security, ticket requirements, codebase duplication, and custom organizational rules. )

[//]: # ()
[//]: # (<img width="384" alt="compliance-image" src="https://codium.ai/images/pr_agent/compliance_partial.png"/>)

[//]: # ()
[//]: # (Read more about it [here]&#40;https://qodo-merge-docs.qodo.ai/tools/compliance/&#41;)

[//]: # ()
[//]: # ()
[//]: # (## Jul 1, 2025)

[//]: # (You can now receive automatic feedback from Qodo Merge in your local IDE after each commit. Read more about it [here]&#40;https://github.com/qodo-ai/agents/tree/main/agents/qodo-merge-post-commit&#41;.)

[//]: # ()
[//]: # ()
[//]: # (## Jun 21, 2025)

[//]: # ()
[//]: # (v0.30 was [released]&#40;https://github.com/qodo-ai/pr-agent/releases&#41;)

[//]: # ()
[//]: # ()
[//]: # (## Jun 3, 2025)

[//]: # ()
[//]: # (Qodo Merge now offers a simplified free tier 💎.)

[//]: # (Organizations can use Qodo Merge at no cost, with a [monthly limit]&#40;https://qodo-merge-docs.qodo.ai/installation/qodo_merge/#cloud-users&#41; of 75 PR reviews per organization.)

[//]: # ()
[//]: # ()
[//]: # (## Apr 30, 2025)

[//]: # ()
[//]: # (A new feature is now available in the `/improve` tool for Qodo Merge 💎 - Chat on code suggestions.)

[//]: # ()
[//]: # (<img width="512" alt="image" src="https://codium.ai/images/pr_agent/improve_chat_on_code_suggestions_ask.png" />)

[//]: # ()
[//]: # (Read more about it [here]&#40;https://qodo-merge-docs.qodo.ai/tools/improve/#chat-on-code-suggestions&#41;.)

[//]: # ()
[//]: # ()
[//]: # (## Apr 16, 2025)

[//]: # ()
[//]: # (New tool for Qodo Merge 💎 - `/scan_repo_discussions`.)

[//]: # ()
[//]: # (<img width="635" alt="image" src="https://codium.ai/images/pr_agent/scan_repo_discussions_2.png" />)

[//]: # ()
[//]: # (Read more about it [here]&#40;https://qodo-merge-docs.qodo.ai/tools/scan_repo_discussions/&#41;.)

## Why Use PR-Agent?

### 🎯 Built for Real Development Teams

**Fast & Affordable**: Each tool (`/review`, `/improve`, `/ask`) uses a single LLM call (~30 seconds, low cost)

**Handles Any PR Size**: Our [PR Compression strategy](https://qodo-merge-docs.qodo.ai/core-abilities/#pr-compression-strategy) effectively processes both small and large PRs

**Highly Customizable**: JSON-based prompting allows easy customization of review categories and behavior via [configuration files](pr_agent/settings/configuration.toml)

**Platform Agnostic**: 
- **Git Providers**: GitHub, GitLab, BitBucket, Azure DevOps, Gitea
- **Deployment**: CLI, GitHub Actions, Docker, self-hosted, webhooks
- **AI Models**: OpenAI GPT, Claude, Deepseek, and more

**Open Source Benefits**:
- Full control over your data and infrastructure
- Customize prompts and behavior for your team's needs
- No vendor lock-in
- Community-driven development

## Features

<div style="text-align:left;">

PR-Agent offers comprehensive pull request functionalities integrated with various git providers:

|                                                         |                                                                                        | GitHub | GitLab | Bitbucket | Azure DevOps | Gitea |
|---------------------------------------------------------|----------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|:-----:|
| [TOOLS](https://qodo-merge-docs.qodo.ai/tools/)         | [Describe](https://qodo-merge-docs.qodo.ai/tools/describe/)                            |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |
|                                                         | [Review](https://qodo-merge-docs.qodo.ai/tools/review/)                                |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |
|                                                         | [Improve](https://qodo-merge-docs.qodo.ai/tools/improve/)                              |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |
|                                                         | [Ask](https://qodo-merge-docs.qodo.ai/tools/ask/)                                      |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         | ⮑ [Ask on code lines](https://qodo-merge-docs.qodo.ai/tools/ask/#ask-lines)            |   ✅   |   ✅   |           |              |       |
|                                                         | [Help Docs](https://qodo-merge-docs.qodo.ai/tools/help_docs/?h=auto#auto-approval)     |   ✅   |   ✅   |    ✅     |              |       |
|                                                         | [Update CHANGELOG](https://qodo-merge-docs.qodo.ai/tools/update_changelog/)            |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         |                                                                                                                     |        |        |           |              |       |
| [USAGE](https://qodo-merge-docs.qodo.ai/usage-guide/)   | [CLI](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#local-repo-cli)                            |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |
|                                                         | [App / webhook](https://qodo-merge-docs.qodo.ai/usage-guide/automations_and_usage/#github-app)                      |   ✅   |   ✅   |    ✅     |      ✅      |  ✅   |
|                                                         | [Tagging bot](https://github.com/Codium-ai/pr-agent#try-it-now)                                                     |   ✅   |        |           |              |       |
|                                                         | [Actions](https://qodo-merge-docs.qodo.ai/installation/github/#run-as-a-github-action)                              |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         |                                                                                                                     |        |        |           |              |       |
| [CORE](https://qodo-merge-docs.qodo.ai/core-abilities/) | [Adaptive and token-aware file patch fitting](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/) |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         | [Chat on code suggestions](https://qodo-merge-docs.qodo.ai/core-abilities/chat_on_code_suggestions/)                |   ✅   |  ✅   |           |              |       |
|                                                         | [Dynamic context](https://qodo-merge-docs.qodo.ai/core-abilities/dynamic_context/)                                  |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         | [Fetching ticket context](https://qodo-merge-docs.qodo.ai/core-abilities/fetching_ticket_context/)                  |   ✅    |  ✅    |     ✅     |              |       |
|                                                         | [Incremental Update](https://qodo-merge-docs.qodo.ai/core-abilities/incremental_update/)                            |   ✅    |       |           |              |       |
|                                                         | [Interactivity](https://qodo-merge-docs.qodo.ai/core-abilities/interactivity/)                                      |   ✅   |  ✅   |           |              |       |
|                                                         | [Local and global metadata](https://qodo-merge-docs.qodo.ai/core-abilities/metadata/)                               |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         | [Multiple models support](https://qodo-merge-docs.qodo.ai/usage-guide/changing_a_model/)                            |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         | [PR compression](https://qodo-merge-docs.qodo.ai/core-abilities/compression_strategy/)                              |   ✅   |   ✅   |    ✅     |      ✅      |       |
|                                                         | [RAG context enrichment](https://qodo-merge-docs.qodo.ai/core-abilities/rag_context_enrichment/)                    |   ✅    |       |    ✅     |              |       |
|                                                         | [Self reflection](https://qodo-merge-docs.qodo.ai/core-abilities/self_reflection/)                                  |   ✅   |   ✅   |    ✅     |      ✅      |       |

[//]: # (- Support for additional git providers is described in [here]&#40;./docs/Full_environments.md&#41;)
___

## See It in Action

</div>
<h4><a href="https://github.com/Codium-ai/pr-agent/pull/530">/describe</a></h4>
<div align="center">
<p float="center">
<img src="https://www.codium.ai/images/pr_agent/describe_new_short_main.png" width="512">
</p>
</div>
<hr>

<h4><a href="https://github.com/Codium-ai/pr-agent/pull/732#issuecomment-1975099151">/review</a></h4>
<div align="center">
<p float="center">
<kbd>
<img src="https://www.codium.ai/images/pr_agent/review_new_short_main.png" width="512">
</kbd>
</p>
</div>
<hr>

<h4><a href="https://github.com/Codium-ai/pr-agent/pull/732#issuecomment-1975099159">/improve</a></h4>
<div align="center">
<p float="center">
<kbd>
<img src="https://www.codium.ai/images/pr_agent/improve_new_short_main.png" width="512">
</kbd>
</p>
</div>

<div align="left">

</div>
<hr>

## Try It Now

Try the GPT-5 powered PR-Agent instantly on _your public GitHub repository_. Just mention `@CodiumAI-Agent` and add the desired command in any PR comment. The agent will generate a response based on your command.
For example, add a comment to any pull request with the following text:

```
@CodiumAI-Agent /review
```

and the agent will respond with a review of your PR.

Note that this is a promotional bot, suitable only for initial experimentation.
It does not have 'edit' access to your repo, for example, so it cannot update the PR description or add labels (`@CodiumAI-Agent /describe` will publish PR description as a comment). In addition, the bot cannot be used on private repositories, as it does not have access to the files there.


## How It Works

The following diagram illustrates PR-Agent tools and their flow:

![PR-Agent Tools](https://www.qodo.ai/images/pr_agent/diagram-v0.9.png)

## Data Privacy

### Self-hosted PR-Agent

- If you host PR-Agent with your OpenAI API key, it is between you and OpenAI. You can read their API data privacy policy here:
https://openai.com/enterprise-privacy

## Contributing

To contribute to the project, get started by reading our [Contributing Guide](https://github.com/qodo-ai/pr-agent/blob/b09eec265ef7d36c232063f76553efb6b53979ff/CONTRIBUTING.md).


## ❤️ Community

This open-source release remains here as a community contribution from Qodo — the origin of modern AI-powered code collaboration. We’re proud to share it and inspire developers worldwide.

The project now has its first external maintainer, Naor ([@naorpeled](https://github.com/naorpeled)), and is currently in the process of being donated to an open-source foundation.


================================================
FILE: RELEASE_NOTES.md
================================================
## [Version 0.11] - 2023-12-07

- codiumai/pr-agent:0.11
- codiumai/pr-agent:0.11-github_app
- codiumai/pr-agent:0.11-bitbucket-app
- codiumai/pr-agent:0.11-gitlab_webhook
- codiumai/pr-agent:0.11-github_polling
- codiumai/pr-agent:0.11-github_action

### Added::Algo

- New section in `/describe` tool - [PR changes walkthrough](https://github.com/Codium-ai/pr-agent/pull/509)
- Improving PR Agent [prompts](https://github.com/Codium-ai/pr-agent/pull/501)
- Persistent tools (`/review`, `/describe`) now send an [update message](https://github.com/Codium-ai/pr-agent/pull/499) after finishing
- Add Amazon Bedrock [support](https://github.com/Codium-ai/pr-agent/pull/483)

### Fixed

- Update [dependencies](https://github.com/Codium-ai/pr-agent/pull/503) in requirements.txt for Python 3.12

## [Version 0.10] - 2023-11-15

- codiumai/pr-agent:0.10
- codiumai/pr-agent:0.10-github_app
- codiumai/pr-agent:0.10-bitbucket-app
- codiumai/pr-agent:0.10-gitlab_webhook
- codiumai/pr-agent:0.10-github_polling
- codiumai/pr-agent:0.10-github_action

### Added::Algo

- Review tool now works with [persistent comments](https://github.com/Codium-ai/pr-agent/pull/451) by default
- Bitbucket now publishes review suggestions with [code links](https://github.com/Codium-ai/pr-agent/pull/428)
- Enabling to limit [max number of tokens](https://github.com/Codium-ai/pr-agent/pull/437/files)
- Support ['gpt-4-1106-preview'](https://github.com/Codium-ai/pr-agent/pull/437/files) model
- Support for Google's [Vertex AI](https://github.com/Codium-ai/pr-agent/pull/436)
- Implementing [thresholds](https://github.com/Codium-ai/pr-agent/pull/423) for incremental PR reviews
- Decoupled custom labels from [PR type](https://github.com/Codium-ai/pr-agent/pull/431)

### Fixed

- Fixed bug in [parsing quotes](https://github.com/Codium-ai/pr-agent/pull/446) in CLI
- Preserve [user-added labels](https://github.com/Codium-ai/pr-agent/pull/433) in pull requests
- Bug fixes in GitLab and BitBucket

## [Version 0.9] - 2023-10-29

- codiumai/pr-agent:0.9
- codiumai/pr-agent:0.9-github_app
- codiumai/pr-agent:0.9-bitbucket-app
- codiumai/pr-agent:0.9-gitlab_webhook
- codiumai/pr-agent:0.9-github_polling
- codiumai/pr-agent:0.9-github_action

### Added::Algo

- New tool - [generate_labels](https://github.com/Codium-ai/pr-agent/blob/main/docs/GENERATE_CUSTOM_LABELS.md)
- New ability to use [customize labels](https://github.com/Codium-ai/pr-agent/blob/main/docs/GENERATE_CUSTOM_LABELS.md#how-to-enable-custom-labels) on the `review` and `describe` tools.
- New tool - [add_docs](https://github.com/Codium-ai/pr-agent/blob/main/docs/ADD_DOCUMENTATION.md)
- GitHub Action: Can now use a `.pr_agent.toml` file to control configuration parameters (see [Usage Guide](./Usage.md#working-with-github-action)).
- GitHub App: Added ability to trigger tools on [push events](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#github-app-automatic-tools-for-new-code-pr-push)
- Support custom domain URLs for Azure devops integration (see [link](https://github.com/Codium-ai/pr-agent/pull/381)).
- PR Description default mode is now in [bullet points](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L35).

### Added::Documentation

Significant documentation updates (see [Installation Guide](https://github.com/Codium-ai/pr-agent/blob/main/INSTALL.md), [Usage Guide](https://github.com/Codium-ai/pr-agent/blob/main/Usage.md), and [Tools Guide](https://github.com/Codium-ai/pr-agent/blob/main/docs/TOOLS_GUIDE.md))

### Fixed

- Fixed support for BitBucket pipeline (see [link](https://github.com/Codium-ai/pr-agent/pull/386))
- Fixed a bug in `review -i` tool
- Added blacklist for specific file extensions in `add_docs` tool (see [link](https://github.com/Codium-ai/pr-agent/pull/385/))

## [Version 0.8] - 2023-09-27

- codiumai/pr-agent:0.8
- codiumai/pr-agent:0.8-github_app
- codiumai/pr-agent:0.8-bitbucket-app
- codiumai/pr-agent:0.8-gitlab_webhook
- codiumai/pr-agent:0.8-github_polling
- codiumai/pr-agent:0.8-github_action

### Added::Algo

- GitHub Action: Can control which tools will run automatically when a new PR is created. (see usage guide: https://github.com/Codium-ai/pr-agent/blob/main/Usage.md#working-with-github-action)
- Code suggestion tool: Will try to avoid an 'add comments' suggestion  (see https://github.com/Codium-ai/pr-agent/pull/327)

### Fixed

- Gitlab: Fixed a bug of improper usage of pr_id

## [Version 0.7] - 2023-09-20

### Docker Tags

- codiumai/pr-agent:0.7
- codiumai/pr-agent:0.7-github_app
- codiumai/pr-agent:0.7-bitbucket-app
- codiumai/pr-agent:0.7-gitlab_webhook
- codiumai/pr-agent:0.7-github_polling
- codiumai/pr-agent:0.7-github_action

### Added::Algo

- New tool /similar_issue - Currently on GitHub app and CLI: indexes the issues in the repo, find the most similar issues to the target issue.
- Describe markers: Empower the /describe tool with a templating capability (see more details in https://github.com/Codium-ai/pr-agent/pull/273).
- New feature in the /review tool - added an estimated effort estimation to the review (https://github.com/Codium-ai/pr-agent/pull/306).

### Added::Infrastructure

- Implementation of a GitLab webhook.
- Implementation of a BitBucket app.

### Fixed

- Protection against no code suggestions generated.
- Resilience to repositories where the languages cannot be automatically detected.


================================================
FILE: SECURITY.md
================================================
# Security Policy

PR-Agent is an open-source tool to help efficiently review and handle pull requests. Qodo Merge is a paid version of PR-Agent, designed for companies and teams that require additional features and capabilities.

This document describes the security policy of PR-Agent. For Qodo Merge's security policy, see [here](https://qodo-merge-docs.qodo.ai/overview/data_privacy/#qodo-merge).

## PR-Agent Self-Hosted Solutions

When using PR-Agent with your OpenAI (or other LLM provider) API key, the security relationship is directly between you and the provider. We do not send your code to Qodo servers.

Types of [self-hosted solutions](https://qodo-merge-docs.qodo.ai/installation):

- Locally
- GitHub integration
- GitLab integration
- BitBucket integration
- Azure DevOps integration

## PR-Agent Supported Versions

This section outlines which versions of PR-Agent are currently supported with security updates.

### Docker Deployment Options

#### Latest Version

For the most recent updates, use our latest Docker image which is automatically built nightly:

```yaml
uses: qodo-ai/pr-agent@main
```

#### Specific Release Version

For a fixed version, you can pin your action to a specific release version. Browse available releases at:
[PR-Agent Releases](https://github.com/qodo-ai/pr-agent/releases)

For example, to github action:

```yaml
steps:
  - name: PR Agent action step
    id: pragent
    uses: docker://codiumai/pr-agent:0.26-github_action
```

#### Enhanced Security with Docker Digest

For maximum security, you can specify the Docker image using its digest:

```yaml
steps:
  - name: PR Agent action step
    id: pragent
    uses: docker://codiumai/pr-agent@sha256:14165e525678ace7d9b51cda8652c2d74abb4e1d76b57c4a6ccaeba84663cc64
```

## Reporting a Vulnerability

We take the security of PR-Agent seriously. If you discover a security vulnerability, please report it immediately to:

Email: security@qodo.ai

Please include a description of the vulnerability, steps to reproduce, and the affected PR-Agent version.


================================================
FILE: action.yaml
================================================
name: 'Codium PR Agent'
description: 'Summarize, review and suggest improvements for pull requests'
branding:
  icon: 'award'
  color: 'green'
runs:
  using: 'docker'
  image: 'Dockerfile.github_action_dockerhub'


================================================
FILE: codecov.yml
================================================
comment: false
coverage:
  status:
    patch: false
    project: false


================================================
FILE: docker/Dockerfile
================================================
FROM python:3.12.10-slim AS base

RUN apt update && apt install --no-install-recommends -y git curl && apt-get clean && rm -rf /var/lib/apt/lists/*

WORKDIR /app
ADD pyproject.toml .
ADD requirements.txt .
ADD docs docs
RUN pip install --no-cache-dir . && rm pyproject.toml requirements.txt
ENV PYTHONPATH=/app

FROM base AS github_app
ADD pr_agent pr_agent
CMD ["python", "-m", "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-c", "pr_agent/servers/gunicorn_config.py", "--forwarded-allow-ips", "*", "pr_agent.servers.github_app:app"]

FROM base AS bitbucket_app
ADD pr_agent pr_agent
CMD ["python", "pr_agent/servers/bitbucket_app.py"]

FROM base AS bitbucket_server_webhook
ADD pr_agent pr_agent
CMD ["python", "pr_agent/servers/bitbucket_server_webhook.py"]

FROM base AS github_polling
ADD pr_agent pr_agent
CMD ["python", "pr_agent/servers/github_polling.py"]

FROM base AS gitlab_webhook
ADD pr_agent pr_agent
CMD ["python", "pr_agent/servers/gitlab_webhook.py"]

FROM base AS azure_devops_webhook
ADD pr_agent pr_agent
CMD ["python", "pr_agent/servers/azuredevops_server_webhook.py"]

FROM base AS gitea_app
ADD pr_agent pr_agent
CMD ["python", "-m", "gunicorn", "-k", "uvicorn.workers.UvicornWorker", "-c", "pr_agent/servers/gunicorn_config.py","pr_agent.servers.gitea_app:app"]


FROM base AS test
ADD requirements-dev.txt .
RUN pip install --no-cache-dir -r requirements-dev.txt && rm requirements-dev.txt
ADD pr_agent pr_agent
ADD tests tests

FROM base AS cli
ADD pr_agent pr_agent
ENTRYPOINT ["python", "pr_agent/cli.py"]


================================================
FILE: docker/Dockerfile.lambda
================================================
FROM public.ecr.aws/lambda/python:3.12 AS base

RUN dnf update -y && \
    dnf install -y gcc python3-devel git && \
    dnf clean all

ADD pyproject.toml requirements.txt ./
RUN pip install --no-cache-dir . && rm pyproject.toml
RUN pip install --no-cache-dir mangum==0.17.0
COPY pr_agent/ ${LAMBDA_TASK_ROOT}/pr_agent/

FROM base AS github_lambda
CMD ["pr_agent.servers.github_lambda_webhook.lambda_handler"]

FROM base AS gitlab_lambda
CMD ["pr_agent.servers.gitlab_lambda_webhook.lambda_handler"]

FROM github_lambda


================================================
FILE: docs/README.md
================================================
# [Visit Our Docs Portal](https://qodo-merge-docs.qodo.ai/)


================================================
FILE: docs/docs/.gitbook.yaml
================================================
root: ./

structure:
  readme: ../README.md
  summary: ./summary.md


================================================
FILE: docs/docs/CNAME
================================================
qodo-merge-docs.qodo.ai


================================================
FILE: docs/docs/core-abilities/compression_strategy.md
================================================

`Supported Git Platforms: GitHub, GitLab, Bitbucket`


## Overview

There are two scenarios:

1. The PR is small enough to fit in a single prompt (including system and user prompt)
2. The PR is too large to fit in a single prompt (including system and user prompt)

For both scenarios, we first use the following strategy

#### Repo language prioritization strategy

We prioritize the languages of the repo based on the following criteria:

1. Exclude binary files and non code files (e.g. images, pdfs, etc)
2. Given the main languages used in the repo
3. We sort the PR files by the most common languages in the repo (in descending order):
   * ```[[file.py, file2.py],[file3.js, file4.jsx],[readme.md]]```

### Small PR

In this case, we can fit the entire PR in a single prompt:

1. Exclude binary files and non code files (e.g. images, pdfs, etc)
2. We Expand the surrounding context of each patch to 3 lines above and below the patch

### Large PR

#### Motivation

Pull Requests can be very long and contain a lot of information with varying degree of relevance to the pr-agent.
We want to be able to pack as much information as possible in a single LMM prompt, while keeping the information relevant to the pr-agent.

#### Compression strategy

We prioritize additions over deletions:

* Combine all deleted files into a single list (`deleted files`)
* File patches are a list of hunks, remove all hunks of type deletion-only from the hunks in the file patch

#### Adaptive and token-aware file patch fitting

We use [tiktoken](https://github.com/openai/tiktoken) to tokenize the patches after the modifications described above, and we use the following strategy to fit the patches into the prompt:

1. Within each language we sort the files by the number of tokens in the file (in descending order):
    * ```[[file2.py, file.py],[file4.jsx, file3.js],[readme.md]]```
2. Iterate through the patches in the order described above
3. Add the patches to the prompt until the prompt reaches a certain buffer from the max token length
4. If there are still patches left, add the remaining patches as a list called `other modified files` to the prompt until the prompt reaches the max token length (hard stop), skip the rest of the patches.
5. If we haven't reached the max token length, add the `deleted files` to the prompt until the prompt reaches the max token length (hard stop), skip the rest of the patches.

#### Example

![Core Abilities](https://codium.ai/images/git_patch_logic.png){width=768}


================================================
FILE: docs/docs/core-abilities/dynamic_context.md
================================================

`Supported Git Platforms: GitHub, GitLab, Bitbucket`

PR-Agent uses an **asymmetric and dynamic context strategy** to improve AI analysis of code changes in pull requests.
It provides more context before changes than after, and dynamically adjusts the context based on code structure (e.g., enclosing functions or classes).
This approach balances providing sufficient context for accurate analysis, while avoiding needle-in-the-haystack information overload that could degrade AI performance or exceed token limits.

## Introduction

Pull request code changes are retrieved in a unified diff format, showing three lines of context before and after each modified section, with additions marked by '+' and deletions by '-'.

```diff
@@ -12,5 +12,5 @@ def func1():
 code line that already existed in the file...
 code line that already existed in the file...
 code line that already existed in the file....
-code line that was removed in the PR
+new code line added in the PR
 code line that already existed in the file...
 code line that already existed in the file...
 code line that already existed in the file...

@@ -26,2 +26,4 @@ def func2():
...
```

This unified diff format can be challenging for AI models to interpret accurately, as it provides limited context for understanding the full scope of code changes.
The presentation of code using '+', '-', and ' ' symbols to indicate additions, deletions, and unchanged lines respectively also differs from the standard code formatting typically used to train AI models.

## Challenges of expanding the context window

While expanding the context window is technically feasible, it presents a more fundamental trade-off:

Pros:

- Enhanced context allows the model to better comprehend and localize the code changes, results (potentially) in more precise analysis and suggestions. Without enough context, the model may struggle to understand the code changes and provide relevant feedback.

Cons:

- Excessive context may overwhelm the model with extraneous information, creating a "needle in a haystack" scenario where focusing on the relevant details (the code that actually changed) becomes challenging.
LLM quality is known to degrade when the context gets larger.
Pull requests often encompass multiple changes across many files, potentially spanning hundreds of lines of modified code. This complexity presents a genuine risk of overwhelming the model with excessive context.

- Increased context expands the token count, increasing processing time and cost, and may prevent the model from processing the entire pull request in a single pass.

## Asymmetric and dynamic context

To address these challenges, PR-Agent employs an **asymmetric** and **dynamic** context strategy, providing the model with more focused and relevant context information for each code change.

**Asymmetric:**

We start by recognizing that the context preceding a code change is typically more crucial for understanding the modification than the context following it.
Consequently, PR-Agent implements an asymmetric context policy, decoupling the context window into two distinct segments: one for the code before the change and another for the code after.

By independently adjusting each context window, PR-Agent can supply the model with a more tailored and pertinent context for individual code changes.

**Dynamic:**

We also employ a "dynamic" context strategy.
We start by recognizing that the optimal context for a code change often corresponds to its enclosing code component (e.g., function, class), rather than a fixed number of lines.
Consequently, we dynamically adjust the context window based on the code's structure, ensuring the model receives the most pertinent information for each modification.

To prevent overwhelming the model with excessive context, we impose a limit on the number of lines searched when identifying the enclosing component.
This balance allows for comprehensive understanding while maintaining efficiency and limiting context token usage.

## Appendix - relevant configuration options

```toml
[config]
patch_extension_skip_types =[".md",".txt"]  # Skip files with these extensions when trying to extend the context
allow_dynamic_context=true                  # Allow dynamic context extension
max_extra_lines_before_dynamic_context = 8  # will try to include up to X extra lines before the hunk in the patch, until we reach an enclosing function or class
patch_extra_lines_before = 3                # Number of extra lines (+3 default ones) to include before each hunk in the patch
patch_extra_lines_after = 1                 # Number of extra lines (+3 default ones) to include after each hunk in the patch
```


================================================
FILE: docs/docs/core-abilities/fetching_ticket_context.md
================================================
# Fetching Ticket Context for PRs

`Supported Git Platforms: GitHub, GitLab, Bitbucket`

!!! note "Branch-name issue linking: GitHub only (for now)"
    Extracting issue links from the **branch name** (and the optional `branch_issue_regex` setting) is currently implemented for **GitHub only**. Support for GitLab, Bitbucket, and other platforms is planned for a later release. The GitHub flow was the most relevant to implement first; other providers will follow.

## Overview

PR-Agent streamlines code review workflows by seamlessly connecting with multiple ticket management systems.
This integration enriches the review process by automatically surfacing relevant ticket information and context alongside code changes.

**Ticket systems supported**:

- [GitHub/Gitlab Issues](#githubgitlab-issues-integration)
- [Jira](#jira-integration)

**Ticket data fetched:**

1. Ticket Title
2. Ticket Description
3. Custom Fields (Acceptance criteria)
4. Subtasks (linked tasks)
5. Labels
6. Attached Images/Screenshots

## Affected Tools

Ticket Recognition Requirements:

- The PR description should contain a link to the ticket or if the branch name starts with the ticket id / number.
- For Jira tickets, you should follow the instructions in [Jira Integration](#jira-integration) in order to authenticate with Jira.

### Describe tool

PR-Agent will recognize the ticket and use the ticket content (title, description, labels) to provide additional context for the code changes.
By understanding the reasoning and intent behind modifications, the LLM can offer more insightful and relevant code analysis.

### Review tool

Similarly to the `describe` tool, the `review` tool will use the ticket content to provide additional context for the code changes.

In addition, this feature will evaluate how well a Pull Request (PR) adheres to its original purpose/intent as defined by the associated ticket or issue mentioned in the PR description.
Each ticket will be assigned a label (Compliance/Alignment level), Indicates the degree to which the PR fulfills its original purpose:

- Fully Compliant
- Partially Compliant
- Not Compliant
- PR Code Verified

![Ticket Compliance](https://www.qodo.ai/images/pr_agent/ticket_compliance_review.png){width=768}

A `PR Code Verified` label indicates the PR code meets ticket requirements, but requires additional manual testing beyond the code scope. For example - validating UI display across different environments (Mac, Windows, mobile, etc.).


#### Configuration options

-

    By default, the `review` tool will automatically validate if the PR complies with the referenced ticket.
    If you want to disable this feedback, add the following line to your configuration file:

    ```toml
    [pr_reviewer]
    require_ticket_analysis_review=false
    ```

-

    If you set:
    ```toml
    [pr_reviewer]
    check_pr_additional_content=true
    ```
    (default: `false`)

    the `review` tool will also validate that the PR code doesn't contain any additional content that is not related to the ticket. If it does, the PR will be labeled at best as `PR Code Verified`, and the `review` tool will provide a comment with the additional unrelated content found in the PR code.

## GitHub/Gitlab Issues Integration

PR-Agent will automatically recognize GitHub/Gitlab issues mentioned in the PR description and fetch the issue content.
Examples of valid GitHub/Gitlab issue references:

- `https://github.com/<ORG_NAME>/<REPO_NAME>/issues/<ISSUE_NUMBER>` or `https://gitlab.com/<ORG_NAME>/<REPO_NAME>/-/issues/<ISSUE_NUMBER>`
- `#<ISSUE_NUMBER>`
- `<ORG_NAME>/<REPO_NAME>#<ISSUE_NUMBER>`

Branch names can also be used to link issues, for example:
- `123-fix-bug` (where `123` is the issue number)

This branch-name detection applies **only when the git provider is GitHub**. Support for other platforms is planned for later.

Since PR-Agent is integrated with GitHub, it doesn't require any additional configuration to fetch GitHub issues.

## Jira Integration

We support both Jira Cloud and Jira Server/Data Center.

### Jira Cloud

#### Email/Token Authentication

You can create an API token from your Atlassian account:

1. Log in to https://id.atlassian.com/manage-profile/security/api-tokens.

2. Click Create API token.

3. From the dialog that appears, enter a name for your new token and click Create.

4. Click Copy to clipboard.

![Jira Cloud API Token](https://images.ctfassets.net/zsv3d0ugroxu/1RYvh9lqgeZjjNe5S3Hbfb/155e846a1cb38f30bf17512b6dfd2229/screenshot_NewAPIToken){width=384}

5. In your [configuration file](../usage-guide/configuration_options.md) add the following lines:

```toml
[jira]
jira_api_token = "YOUR_API_TOKEN"
jira_api_email = "YOUR_EMAIL"
```

### Jira Data Center/Server

#### Using Basic Authentication for Jira Data Center/Server

You can use your Jira username and password to authenticate with Jira Data Center/Server.

In your Configuration file/Environment variables/Secrets file, add the following lines:

```toml
jira_api_email = "your_username"
jira_api_token = "your_password"
```

(Note that indeed the 'jira_api_email' field is used for the username, and the 'jira_api_token' field is used for the user password.)

##### Validating Basic authentication via Python script

If you are facing issues retrieving tickets in PR-Agent with Basic auth, you can validate the flow using a Python script.
This following steps will help you check if the basic auth is working correctly, and if you can access the Jira ticket details:

1. run `pip install jira==3.8.0`

2. run the following Python script (after replacing the placeholders with your actual values):

???- example "Script to validate basic auth"

    ```python
    from jira import JIRA
    
    
    if __name__ == "__main__":
        try:
            # Jira server URL
            server = "https://..."
            # Basic auth
            username = "..."
            password = "..."
            # Jira ticket code (e.g. "PROJ-123")
            ticket_id = "..."
    
            print("Initializing JiraServerTicketProvider with JIRA server")
            # Initialize JIRA client
            jira = JIRA(
                server=server,
                basic_auth=(username, password),
                timeout=30
            )
            if jira:
                print(f"JIRA client initialized successfully")
            else:
                print("Error initializing JIRA client")
    
            # Fetch ticket details
            ticket = jira.issue(ticket_id)
            print(f"Ticket title: {ticket.fields.summary}")
    
        except Exception as e:
            print(f"Error fetching JIRA ticket details: {e}")
    ```

#### Using a Personal Access Token (PAT) for Jira Data Center/Server

1. Create a [Personal Access Token (PAT)](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) in your Jira account
2. In your Configuration file/Environment variables/Secrets file, add the following lines:

```toml
[jira]
jira_base_url = "YOUR_JIRA_BASE_URL" # e.g. https://jira.example.com
jira_api_token = "YOUR_API_TOKEN"
```

##### Validating PAT token via Python script

If you are facing issues retrieving tickets in PR-Agent with PAT token, you can validate the flow using a Python script.
This following steps will help you check if the token is working correctly, and if you can access the Jira ticket details:

1. run `pip install jira==3.8.0`

2. run the following Python script (after replacing the placeholders with your actual values):

??? example- "Script to validate PAT token"

    ```python
    from jira import JIRA
    
    
    if __name__ == "__main__":
        try:
            # Jira server URL
            server = "https://..."
            # Jira PAT token
            token_auth = "..."
            # Jira ticket code (e.g. "PROJ-123")
            ticket_id = "..."
    
            print("Initializing JiraServerTicketProvider with JIRA server")
            # Initialize JIRA client
            jira = JIRA(
                server=server,
                token_auth=token_auth,
                timeout=30
            )
            if jira:
                print(f"JIRA client initialized successfully")
            else:
                print("Error initializing JIRA client")
    
            # Fetch ticket details
            ticket = jira.issue(ticket_id)
            print(f"Ticket title: {ticket.fields.summary}")
    
        except Exception as e:
            print(f"Error fetching JIRA ticket details: {e}")
    ```


### Multi-JIRA Server Configuration

PR-Agent supports connecting to multiple JIRA servers using different authentication methods.

=== "Email/Token (Basic Auth)"

    Configure multiple servers using Email/Token authentication:

    - `jira_servers`: List of JIRA server URLs
    - `jira_api_token`: List of API tokens (for Cloud) or passwords (for Data Center)
    - `jira_api_email`: List of emails (for Cloud) or usernames (for Data Center)
    - `jira_base_url`: Default server for ticket IDs like `PROJ-123`, Each repository can configure (local config file) its own `jira_base_url` to choose which server to use by default.

    **Example Configuration:**
    ```toml
    [jira]
    # Server URLs
    jira_servers = ["https://company.atlassian.net", "https://datacenter.jira.com"]

    # API tokens/passwords
    jira_api_token = ["cloud_api_token_here", "datacenter_password"]

    # Emails/usernames (both required)
    jira_api_email = ["user@company.com", "datacenter_username"]

    # Default server for ticket IDs
    jira_base_url = "https://company.atlassian.net"
    ```

=== "PAT Auth"

    Configure multiple servers using Personal Access Token authentication:

    - `jira_servers`: List of JIRA server URLs
    - `jira_api_token`: List of PAT tokens
    - `jira_api_email`: Not needed (can be omitted or left empty)
    - `jira_base_url`: Default server for ticket IDs like `PROJ-123`, Each repository can configure (local config file) its own `jira_base_url` to choose which server to use by default.

    **Example Configuration:**
    ```toml
    [jira]
    # Server URLs
    jira_servers = ["https://server1.jira.com", "https://server2.jira.com"]

    # PAT tokens only
    jira_api_token = ["pat_token_1", "pat_token_2"]

    # Default server for ticket IDs
    jira_base_url = "https://server1.jira.com"
    ```

    **Mixed Authentication (Email/Token + PAT):**
    ```toml
    [jira]
    jira_servers = ["https://company.atlassian.net", "https://server.jira.com"]
    jira_api_token = ["cloud_api_token", "server_pat_token"]
    jira_api_email = ["user@company.com", ""]  # Empty for PAT
    ```


### How to link a PR to a Jira ticket

To integrate with Jira, you can link your PR to a ticket using either of these methods:

**Method 1: Description Reference:**

Include a ticket reference in your PR description, using either the complete URL format `https://<JIRA_ORG>.atlassian.net/browse/ISSUE-123` or the shortened ticket ID `ISSUE-123` (without prefix or suffix for the shortened ID).

**Method 2: Branch Name Detection:**

Name your branch with the ticket ID as a prefix (e.g., `ISSUE-123-feature-description` or `ISSUE-123/feature-description`).

!!! note "Jira Base URL"
    For shortened ticket IDs or branch detection (method 2 for JIRA cloud), you must configure the Jira base URL in your configuration file under the [jira] section:

    ```toml
    [jira]
    jira_base_url = "https://<JIRA_ORG>.atlassian.net"
    ```
    Where `<JIRA_ORG>` is your Jira organization identifier (e.g., `mycompany` for `https://mycompany.atlassian.net`).


================================================
FILE: docs/docs/core-abilities/index.md
================================================
# Core Abilities

PR-Agent utilizes a variety of core abilities to provide a comprehensive and efficient code review experience. These abilities include:

- [Compression strategy](./compression_strategy.md)
- [Dynamic context](./dynamic_context.md)
- [Fetching ticket context](./fetching_ticket_context.md)
- [Interactivity](./interactivity.md)
- [Local and global metadata](./metadata.md)
- [Self-reflection](./self_reflection.md)

## Blogs

Here are some additional technical blogs from Qodo, that delve deeper into the core capabilities and features of Large Language Models (LLMs) when applied to coding tasks.
These resources provide more comprehensive insights into leveraging LLMs for software development.

### Code Generation and LLMs

- [Effective AI code suggestions: less is more](https://www.codium.ai/blog/effective-code-suggestions-llms-less-is-more/)
- [State-of-the-art Code Generation with AlphaCodium – From Prompt Engineering to Flow Engineering](https://www.codium.ai/blog/qodoflow-state-of-the-art-code-generation-for-code-contests/)
- [RAG for a Codebase with 10k Repos](https://www.codium.ai/blog/rag-for-large-scale-code-repos/)

### Development Processes

- [Understanding the Challenges and Pain Points of the Pull Request Cycle](https://www.codium.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/)
- [Introduction to Code Coverage Testing](https://www.codium.ai/blog/introduction-to-code-coverage-testing/)

### Cost Optimization

- [Reduce Your Costs by 30% When Using GPT for Python Code](https://www.codium.ai/blog/reduce-your-costs-by-30-when-using-gpt-3-for-python-code/)


================================================
FILE: docs/docs/core-abilities/interactivity.md
================================================
# Interactivity

`Supported Git Platforms: GitHub, GitLab`

## Overview

PR-Agent transforms static code reviews into interactive experiences by enabling direct actions from pull request (PR) comments.
Developers can immediately trigger actions and apply changes with simple checkbox clicks.

This focused workflow maintains context while dramatically reducing the time between PR creation and final merge.
The approach eliminates manual steps, provides clear visual indicators, and creates immediate feedback loops all within the same interface.

## Key Interactive Features

### 1\. Interactive `/improve` Tool

The [`/improve`](../tools/improve.md) command delivers a comprehensive interactive experience:

- _**Apply this suggestion**_: Clicking this checkbox instantly converts a suggestion into a committable code change. When committed to the PR, changes made to code that was flagged for improvement will be marked with a check mark, allowing developers to easily track and review implemented recommendations.

- _**More**_: Triggers additional suggestions generation while keeping each suggestion focused and relevant as the original set

- _**Update**_: Triggers a re-analysis of the code, providing updated suggestions based on the latest changes

- _**Author self-review**_: Interactive acknowledgment that developers have opened and reviewed collapsed suggestions

### 2\. Interactive `/help` Tool

The [`/help`](../tools/help.md) command not only lists available tools and their descriptions but also enables immediate tool invocation through interactive checkboxes.
When a user checks a tool's checkbox, PR-Agent instantly triggers that tool without requiring additional commands.
This transforms the standard help menu into an interactive launch pad for all PR-Agent capabilities, eliminating context switching by keeping developers within their PR workflow.


================================================
FILE: docs/docs/core-abilities/metadata.md
================================================
# Local and global metadata injection with multi-stage analysis

`Supported Git Platforms: GitHub, GitLab, Bitbucket`

1\.
PR-Agent initially retrieves for each PR the following data:

- PR title and branch name
- PR original description
- Commit messages history
- PR diff patches, in [hunk diff](https://loicpefferkorn.net/2014/02/diff-files-what-are-hunks-and-how-to-extract-them/) format
- The entire content of the files that were modified in the PR

!!! tip "Tip: Organization-level metadata"
    In addition to the inputs above, PR-Agent can incorporate supplementary preferences provided by the user, like [`extra_instructions` and `organization best practices`](../tools/improve.md#extra-instructions-and-best-practices). This information can be used to enhance the PR analysis.

2\.
By default, the first command that PR-Agent executes is [`describe`](../tools/describe.md), which generates three types of outputs:

- PR Type (e.g. bug fix, feature, refactor, etc)
- PR Description - a bullet point summary of the PR
- Changes walkthrough - for each modified file, provide a one-line summary followed by a detailed bullet point list of the changes.

These AI-generated outputs are now considered as part of the PR metadata, and can be used in subsequent commands like `review` and `improve`.
This effectively enables multi-stage chain-of-thought analysis, without doing any additional API calls which will cost time and money.

For example, when generating code suggestions for different files, PR-Agent can inject the AI-generated ["Changes walkthrough"](https://github.com/qodo-ai/pr-agent/pull/1202#issue-2511546839) file summary in the prompt:

```diff
## File: 'src/file1.py'
### AI-generated file summary:
- edited function `func1` that does X
- Removed function `func2` that was not used
- ....

@@ ... @@ def func1():
__new hunk__
11  unchanged code line0
12  unchanged code line1
13 +new code line2 added
14  unchanged code line3
__old hunk__
 unchanged code line0
 unchanged code line1
-old code line2 removed
 unchanged code line3

@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...
```

3\. The entire PR files that were retrieved are also used to expand and enhance the PR context (see [Dynamic Context](./dynamic_context.md)).

4\. All the metadata described above represents several level of cumulative analysis - ranging from hunk level, to file level, to PR level, to organization level.
This comprehensive approach enables PR-Agent AI models to generate more precise and contextually relevant suggestions and feedback.


================================================
FILE: docs/docs/core-abilities/self_reflection.md
================================================
`Supported Git Platforms: GitHub, GitLab, Bitbucket`

PR-Agent implements a **self-reflection** process where the AI model reflects, scores, and re-ranks its own suggestions, eliminating irrelevant or incorrect ones.
This approach improves the quality and relevance of suggestions, saving users time and enhancing their experience.
Configuration options allow users to set a score threshold for further filtering out suggestions.

## Introduction - Efficient Review with Hierarchical Presentation

Given that not all generated code suggestions will be relevant, it is crucial to enable users to review them in a fast and efficient way, allowing quick identification and filtering of non-applicable ones.

To achieve this goal, PR-Agent offers a dedicated hierarchical structure when presenting suggestions to users:

- A "category" section groups suggestions by their category, allowing users to quickly dismiss irrelevant suggestions.
- Each suggestion is first described by a one-line summary, which can be expanded to a full description by clicking on a collapsible.
- Upon expanding a suggestion, the user receives a more comprehensive description, and a code snippet demonstrating the recommendation.

!!! note "Fast Review"
    This hierarchical structure is designed to facilitate rapid review of each suggestion, with users spending an average of ~5-10 seconds per item.

## Self-reflection and Re-ranking

The AI model is initially tasked with generating suggestions, and outputting them in order of importance.
However, in practice we observe that models often struggle to simultaneously generate high-quality code suggestions and rank them well in a single pass.
Furthermore, the initial set of generated suggestions sometimes contains easily identifiable errors.

To address these issues, we implemented a "self-reflection" process that refines suggestion ranking and eliminates irrelevant or incorrect proposals.
This process consists of the following steps:

1. Presenting the generated suggestions to the model in a follow-up call.
2. Instructing the model to score each suggestion on a scale of 0-10 and provide a rationale for the assigned score.
3. Utilizing these scores to re-rank the suggestions and filter out incorrect ones (with a score of 0).
4. Optionally, filtering out all suggestions below a user-defined score threshold.

Note that presenting all generated suggestions simultaneously provides the model with a comprehensive context, enabling it to make more informed decisions compared to evaluating each suggestion individually.

To conclude, the self-reflection process enables PR-Agent to prioritize suggestions based on their importance, eliminate inaccurate or irrelevant proposals, and optionally exclude suggestions that fall below a specified threshold of significance.
This results in a more refined and valuable set of suggestions for the user, saving time and improving the overall experience.

## Example Results

![self_reflection](https://codium.ai/images/pr_agent/self_reflection1.png){width=768}
![self_reflection](https://codium.ai/images/pr_agent/self_reflection2.png){width=768}

## Appendix - Relevant Configuration Options

```toml
[pr_code_suggestions]
suggestions_score_threshold = 0 # Filter out suggestions with a score below this threshold (0-10)
```


================================================
FILE: docs/docs/css/custom.css
================================================
/* Neutral color scheme - ready for future branding */
:root {
    --md-primary-fg-color: #0f172a;
    --md-accent-fg-color: #1d4ed8;
    --md-typeset-a-color: #1e40af;
}

[data-md-color-scheme="slate"] {
    --md-primary-fg-color: #0b1220;
    --md-accent-fg-color: #38bdf8;
    --md-typeset-a-color: #7dd3fc;
    --md-default-bg-color: #0b1220;
    --md-default-fg-color: #e5e7eb;
    --md-default-fg-color--light: rgba(229, 231, 235, 0.7);
    --md-default-fg-color--lighter: rgba(229, 231, 235, 0.5);
    --md-default-fg-color--lightest: rgba(229, 231, 235, 0.3);
    --md-code-bg-color: #0f172a;
}

.md-nav--primary {
    .md-nav__link {
    font-size: 18px;
    }
}

.md-nav--primary {
    position: relative;
}

.md-nav--primary::before {
    content: "";
    position: absolute;
    top: 0;
    right: 10px;
    width: 2px;
    height: 100%;
    background-color: #e5e7eb;
}

[data-md-color-scheme="slate"] .md-nav--primary::before {
    background-color: #1f2937;
}

[data-md-color-scheme="slate"] .md-header {
    background-color: #0d1b36;
}

[data-md-color-scheme="slate"] .md-tabs {
    background-color: #0b1220;
    border-top: 1px solid rgba(148, 163, 184, 0.25);
}

[data-md-color-scheme="slate"] .md-tabs__link {
    color: #e2e8f0;
}

[data-md-color-scheme="slate"] .md-tabs__link--active,
[data-md-color-scheme="slate"] .md-tabs__link:hover {
    color: #ffffff;
    text-decoration: underline;
    text-underline-offset: 0.25rem;
}

[data-md-color-scheme="slate"] .md-search__form {
    background-color: #0f172a;
    border: 1px solid rgba(148, 163, 184, 0.4);
}

[data-md-color-scheme="slate"] .md-search__input {
    color: #e2e8f0;
}

[data-md-color-scheme="slate"] .md-search__input::placeholder {
    color: rgba(226, 232, 240, 0.7);
}

[data-md-color-scheme="slate"] .md-search__icon {
    color: rgba(226, 232, 240, 0.85);
}

.md-tabs__link  {
    font-size: 18px;
}

.md-header__title {
    font-size: 20px;
    margin-left: 12px !important;
}

.md-header__button.md-logo,
.md-nav__title .md-logo {
    display: none;
}

.md-content img {
    border-width: 1px;
    border-style: solid;
    border-color: rgba(15, 23, 42, 0.2);
    outline-width: 1px;
    outline-style: solid;
    outline-color: rgba(15, 23, 42, 0.25);
}

[data-md-color-scheme="slate"] .md-content img {
    border-color: rgba(226, 232, 240, 0.2);
    outline-color: rgba(226, 232, 240, 0.3);
}

.md-banner {
    background-color: #1d4ed8;
}

[data-md-color-scheme="slate"] .md-banner {
    background-color: #2563eb;
}

.md-banner .md-typeset a,
.md-banner .md-typeset a:hover,
.md-banner .md-typeset a:focus {
    color: currentColor;
    text-decoration: underline;
}


================================================
FILE: docs/docs/faq/index.md
================================================
# FAQ

??? note "Q: Can PR-Agent serve as a substitute for a human reviewer?"
    #### Answer:<span style="display:none;">1</span>

    PR-Agent is designed to assist, not replace, human reviewers.

    Reviewing PRs is a tedious and time-consuming task often seen as a "chore". In addition, the longer the PR – the shorter the relative feedback, since long PRs can overwhelm reviewers, both in terms of technical difficulty, and the actual review time.
    PR-Agent aims to address these pain points, and to assist and empower both the PR author and reviewer.

    However, PR-Agent has built-in safeguards to ensure the developer remains in the driver's seat. For example:

    1. Preserves user's original PR header
    2. Places user's description above the AI-generated PR description
    3. Won't approve PRs; approval remains reviewer's responsibility
    4. The code suggestions are optional, and aim to:
        - Encourage self-review and self-reflection
        - Highlight potential bugs or oversights
        - Enhance code quality and promote best practices

    Read more about this issue in our [blog](https://www.qodo.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/)

___

??? note "Q: I received an incorrect or irrelevant suggestion. Why?"

    #### Answer:<span style="display:none;">2</span>

    - Modern AI models, like Claude Sonnet and GPT-5, are improving rapidly but remain imperfect. Users should critically evaluate all suggestions rather than accepting them automatically.
    - AI errors are rare, but possible. A main value from reviewing the code suggestions lies in their high probability of catching **mistakes or bugs made by the PR author**. We believe it's worth spending 30-60 seconds reviewing suggestions, even if some aren't relevant, as this practice can enhance code quality and prevent bugs in production.


    - The hierarchical structure of the suggestions is designed to help the user _quickly_ understand them, and to decide which ones are relevant and which are not:

        - Only if the `Category` header is relevant, the user should move to the summarized suggestion description.
        - Only if the summarized suggestion description is relevant, the user should click on the collapsible, to read the full suggestion description with a code preview example.

    - In addition, we recommend to use the [`extra_instructions`](../tools/improve.md#extra-instructions-and-best-practices) field to guide the model to suggestions that are more relevant to the specific needs of the project.

___

??? note "Q: How can I get more tailored suggestions?"
    #### Answer:<span style="display:none;">3</span>

    See [here](../tools/improve.md#extra-instructions-and-best-practices) for more information on how to use the `extra_instructions` and `best_practices` configuration options, to guide the model to more tailored suggestions.

___

??? note "Q: Will you store my code? Are you using my code to train models?"
    #### Answer:<span style="display:none;">4</span>

    No. PR-Agent strict privacy policy ensures that your code is not stored or used for training purposes.

    For a detailed overview of our data privacy policy, please refer to [this link](../overview/data_privacy.md)

___

??? note "Q: Can PR-Agent review draft/offline PRs?"
    #### Answer:<span style="display:none;">6</span>

    Yes. While PR-Agent won't automatically review draft PRs, you can still get feedback by manually requesting it through [online commenting](../usage-guide/automations_and_usage.md#online-usage).

    For active PRs, you can customize the automatic feedback settings [here](../usage-guide/automations_and_usage.md#pr-agent-automatic-feedback) to match your team's workflow.
___

??? note "Q: Can the 'Review effort' feedback be calibrated or customized?"
    #### Answer:<span style="display:none;">7</span>

    Yes, you can customize review effort estimates using the `extra_instructions` configuration option (see [documentation](../tools/review.md#configuration-options)).
    
    Example mapping:

    - Effort 1: < 30 minutes review time
    - Effort 2: 30-60 minutes review time
    - Effort 3: 60-90 minutes review time
    - ...
    
    Note: The effort levels (1-5) are primarily meant for _comparative_ purposes, helping teams prioritize reviewing smaller PRs first. The actual review duration may vary, as the focus is on providing consistent relative effort estimates.

___

??? note "Q: How to reduce the noise generated by PR-Agent?"
    #### Answer:<span style="display:none;">3</span>

    The default configuration of PR-Agent is designed to balance helpful feedback with noise reduction. It reduces noise through several approaches:

    - Auto-feedback uses three highly structured tools (`/describe`, `/review`, and `/improve`), designed to be accessible at a glance without creating large visual overload
    - Suggestions are presented in a table format rather than as committable comments, which are far noisier
    - The 'File Walkthrough' section is folded by default, as it tends to be verbose
    - Intermediate comments are avoided when creating new PRs (like "PR-Agent is now reviewing your PR..."), which would generate email noise
    
    From our experience, especially in large teams or organizations, complaints about "noise" sometimes stem from the following issues:

    - **Feedback from multiple bots**: When multiple bots provide feedback on the same PR, it creates confusion and noise. We recommend using PR-Agent as the primary feedback tool to streamline the process and reduce redundancy.
    - **Getting familiar with the tool**: Unlike many tools that provide feedback only on demand, PR-Agent automatically analyzes and suggests improvements for every code change. While this proactive approach can feel intimidating at first, it's designed to continuously enhance code quality and catch bugs and problems when they occur. We recommend reviewing [this guide](../tools/improve.md#understanding-ai-code-suggestions) to help align expectations and maximize the value of PR-Agent's auto-feedback.

    Therefore, at a global configuration level, we recommend using the default configuration, which is designed to reduce noise while providing valuable feedback.
    
    However, if you still find the feedback too noisy, you can adjust the configuration. Since each user and team has different needs, it's definitely possible - and even recommended - to adjust configurations for specific repos as needed.
    Ways to adjust the configuration for noise reduction include for example:

    - [Score thresholds for code suggestions](../tools/improve.md#configuration-options)
    - [Utilizing the `extra_instructions` field for more tailored feedback](../tools/improve.md#extra-instructions)
    - [Controlling which tools run automatically](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened)

    Note that some users may prefer the opposite - more thorough and detailed feedback. PR-Agent is designed to be flexible and customizable, allowing you to tailor the feedback to your team's specific needs and preferences.
    Examples of ways to increase feedback include:

    - [Dual-publishing mode](../tools/improve.md#dual-publishing-mode)
    - [Interactive usage](../core-abilities/interactivity.md)
___


================================================
FILE: docs/docs/index.md
================================================
# Overview

[PR-Agent](https://github.com/qodo-ai/pr-agent) is an open-source, AI-powered code review agent and a community-maintained legacy project of Qodo. It is distinct from Qodo's primary AI code review offering, which provides a feature-rich, context-aware experience. Qodo now offers a free tier that integrates seamlessly with GitHub, GitLab, Bitbucket, and Azure DevOps for high-quality automated reviews.

- See the [Installation Guide](./installation/index.md) for instructions on installing and running the tool on different git platforms.

- See the [Usage Guide](./usage-guide/index.md) for instructions on running commands via different interfaces, including _CLI_, _online usage_, or by _automatically triggering_ them when a new PR is opened.

- See the [Tools Guide](./tools/index.md) for a detailed description of the different tools.

## Docs Smart Search

To search the documentation site using natural language:

1) Comment `/help "your question"` in a pull request where PR-Agent is installed

2) The bot will respond with an [answer](https://github.com/qodo-ai/pr-agent/pull/1241#issuecomment-2365259334) that includes relevant documentation links.

## Features

PR-Agent offers comprehensive pull request functionalities integrated with various git providers:

|       |                                                                                       | GitHub | GitLab | Bitbucket | Azure DevOps | Gitea |
| ----- |---------------------------------------------------------------------------------------|:------:|:------:|:---------:|:------------:|:-----:|
| [TOOLS](./tools/index.md) | [Describe](./tools/describe.md)                                     |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |
|       | [Review](./tools/review.md)                                                           |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |
|       | [Improve](./tools/improve.md)                                                         |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |
|       | [Ask](./tools/ask.md)                                                                 |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | ⮑ [Ask on code lines](./tools/ask.md#ask-lines)                                       |   ✅   |   ✅   |           |              |       |
|       | [Add Docs](./tools/add_docs.md)                                                       |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Generate Labels](./tools/generate_labels.md)                                         |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Similar Issues](./tools/similar_issues.md)                                           |   ✅   |        |           |              |       |
|       | [Help](./tools/help.md)                                                               |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Help Docs](./tools/help_docs.md)                                                     |   ✅   |   ✅   |    ✅     |              |       |
|       | [Update CHANGELOG](./tools/update_changelog.md)                                       |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       |                                                                                       |        |        |           |              |       |
| [USAGE](./usage-guide/index.md) | [CLI](./usage-guide/automations_and_usage.md#local-repo-cli)      |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |
|       | [App / webhook](./usage-guide/automations_and_usage.md#github-app)                    |   ✅   |   ✅   |    ✅     |      ✅       |  ✅   |
|       | [Tagging bot](https://github.com/qodo-ai/pr-agent#try-it-now)                       |   ✅   |        |           |              |       |
|       | [Actions](./installation/github.md#run-as-a-github-action)                            |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       |                                                                                       |        |        |           |              |       |
| [CORE](./core-abilities/index.md) | [Adaptive and token-aware file patch fitting](./core-abilities/compression_strategy.md) |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Chat on code suggestions](./core-abilities/interactivity.md)                         |   ✅   |  ✅   |           |              |       |
|       | [Compression strategy](./core-abilities/compression_strategy.md)                      |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Dynamic context](./core-abilities/dynamic_context.md)                                |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Fetching ticket context](./core-abilities/fetching_ticket_context.md)                |   ✅   |  ✅   |    ✅     |              |       |
|       | [Interactivity](./core-abilities/interactivity.md)                                    |   ✅   |  ✅   |           |              |       |
|       | [Local and global metadata](./core-abilities/metadata.md)                             |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Multiple models support](./usage-guide/changing_a_model.md)                          |   ✅   |   ✅   |    ✅     |      ✅       |       |
|       | [Self reflection](./core-abilities/self_reflection.md)                                |   ✅   |   ✅   |    ✅     |      ✅       |       |

## Example Results

<hr>

#### [/describe](https://github.com/qodo-ai/pr-agent/pull/530)

<figure markdown="1">
![/describe](https://www.codium.ai/images/pr_agent/describe_new_short_main.png){width=512}
</figure>
<hr>

#### [/review](https://github.com/qodo-ai/pr-agent/pull/732#issuecomment-1975099151)

<figure markdown="1">
![/review](https://www.codium.ai/images/pr_agent/review_new_short_main.png){width=512}
</figure>
<hr>

#### [/improve](https://github.com/qodo-ai/pr-agent/pull/732#issuecomment-1975099159)

<figure markdown="1">
![/improve](https://www.codium.ai/images/pr_agent/improve_new_short_main.png){width=512}
</figure>
<hr>

## How it Works

The following diagram illustrates PR-Agent tools and their flow:

![PR-Agent Tools](https://codium.ai/images/pr_agent/diagram-v0.9.png)

Check out the [PR Compression strategy](core-abilities/index.md) page for more details on how we convert a code diff to a manageable LLM prompt


================================================
FILE: docs/docs/installation/azure.md
================================================
## Azure DevOps Pipeline

You can use a pre-built Action Docker image to run PR-Agent as an Azure DevOps pipeline.
Add the following file to your repository under `azure-pipelines.yml`:

```yaml
# Opt out of CI triggers
trigger: none

# Configure PR trigger
# pr:
#   branches:
#     include:
#     - '*'
#   autoCancel: true
#   drafts: false

# NOTE for Azure Repos Git:
# Azure Repos does not honor YAML pr: triggers. Configure Build Validation
# via Branch Policies instead (see note below). You can safely omit pr:.

stages:
- stage: pr_agent
  displayName: 'PR Agent Stage'
  jobs:
  - job: pr_agent_job
    displayName: 'PR Agent Job'
    pool:
      vmImage: 'ubuntu-latest'
    container:
      image: codiumai/pr-agent:latest
      options: --entrypoint ""
    variables:
      - group: pr_agent
    steps:
    - script: |
        echo "Running PR Agent action step"

        # Construct PR_URL
        PR_URL="${SYSTEM_COLLECTIONURI}${SYSTEM_TEAMPROJECT}/_git/${BUILD_REPOSITORY_NAME}/pullrequest/${SYSTEM_PULLREQUEST_PULLREQUESTID}"
        echo "PR_URL=$PR_URL"

        # Extract organization URL from System.CollectionUri
        ORG_URL=$(echo "$(System.CollectionUri)" | sed 's/\/$//') # Remove trailing slash if present
        echo "Organization URL: $ORG_URL"

        export azure_devops__org="$ORG_URL"
        export config__git_provider="azure"

        pr-agent --pr_url="$PR_URL" describe
        pr-agent --pr_url="$PR_URL" review
        pr-agent --pr_url="$PR_URL" improve
      env:
        azure_devops__pat: $(azure_devops_pat)
        openai__key: $(OPENAI_KEY)
      displayName: 'Run PR-Agent'
```

This script will run PR-Agent on every new merge request, with the `improve`, `review`, and `describe` commands.
Note that you need to export the `azure_devops__pat` and `OPENAI_KEY` variables in the Azure DevOps pipeline settings (Pipelines -> Library -> + Variable group):

![PR-Agent](https://codium.ai/images/pr_agent/azure_devops_pipeline_secrets.png){width=468}

Make sure to give pipeline permissions to the `pr_agent` variable group.

> Note that Azure Pipelines lacks support for triggering workflows from PR comments. If you find a viable solution, please contribute it to our [issue tracker](https://github.com/qodo-ai/pr-agent/issues)

### Azure Repos Git PR triggers and Build Validation

Azure Repos Git does not use YAML `pr:` triggers for pipelines. Instead, configure Build Validation on the target branch to run the PR Agent pipeline for pull requests:

1. Go to Project Settings → Repositories → Branches.
2. Select the target branch and open Branch Policies.
3. Under Build Validation, add a policy:
   - Select the PR Agent pipeline (the `azure-pipelines.yml` above).
   - Set it as Required.
4. Remove the `pr:` section from your YAML (not needed for Azure Repos Git).

This distinction applies specifically to Azure Repos Git. Other providers like GitHub and Bitbucket Cloud can use YAML-based PR triggers.

## Azure DevOps from CLI

To use Azure DevOps provider use the following settings in configuration.toml:

```toml
[config]
git_provider="azure"
```

Azure DevOps provider supports [PAT token](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows) or [DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication-overview#authentication-in-server-environments) authentication.
PAT is faster to create, but has built-in expiration date, and will use the user identity for API calls.
Using DefaultAzureCredential you can use managed identity or Service principle, which are more secure and will create separate ADO user identity (via AAD) to the agent.

If PAT was chosen, you can assign the value in .secrets.toml.
If DefaultAzureCredential was chosen, you can assigned the additional env vars like AZURE_CLIENT_SECRET directly,
or use managed identity/az cli (for local development) without any additional configuration.
in any case, 'org' value must be assigned in .secrets.toml:

```toml
[azure_devops]
org = "https://dev.azure.com/YOUR_ORGANIZATION/"
# pat = "YOUR_PAT_TOKEN" needed only if using PAT for authentication
```

## Azure DevOps Webhook

To trigger from an Azure webhook, you need to manually [add a webhook](https://learn.microsoft.com/en-us/azure/devops/service-hooks/services/webhooks?view=azure-devops).
Use the "Pull request created" type to trigger a review, or "Pull request commented on" to trigger any supported comment with /<command> <args> comment on the relevant PR. Note that for the "Pull request commented on" trigger, only API v2.0 is supported.

For webhook security, create a sporadic username/password pair and configure the webhook username and password on both the server and Azure DevOps webhook. These will be sent as basic Auth data by the webhook with each request:

```toml
[azure_devops_server]
webhook_username = "<basic auth user>"
webhook_password = "<basic auth password>"
```

> :warning: **Ensure that the webhook endpoint is only accessible over HTTPS** to mitigate the risk of credential interception when using basic authentication.


================================================
FILE: docs/docs/installation/bitbucket.md
================================================
## Run as a Bitbucket Pipeline

You can use the Bitbucket Pipeline system to run PR-Agent on every pull request open or update.

1. Add the following file in your repository bitbucket-pipelines.yml

```yaml
pipelines:
    pull-requests:
      '**':
        - step:
            name: PR Agent Review
            image: codiumai/pr-agent:latest
            script:
              - pr-agent --pr_url=https://bitbucket.org/$BITBUCKET_WORKSPACE/$BITBUCKET_REPO_SLUG/pull-requests/$BITBUCKET_PR_ID review
```

2. Add the following secure variables to your repository under Repository settings > Pipelines > Repository variables.

   - CONFIG__GIT_PROVIDER: `bitbucket`
   - OPENAI__KEY: `<your key>`
   - BITBUCKET__AUTH_TYPE: `basic` or `bearer` (default is `bearer`)
   - BITBUCKET__BEARER_TOKEN: `<your token>` (required when auth_type is bearer)
   - BITBUCKET__BASIC_TOKEN: `<your token>` (required when auth_type is basic)

You can get a Bitbucket token for your repository by following Repository Settings -> Security -> Access Tokens.
For basic auth, you can generate a base64 encoded token from your username:password combination.

Note that comments on a PR are not supported in Bitbucket Pipeline.

## Bitbucket Server and Data Center

Login into your on-prem instance of Bitbucket with your service account username and password.
Navigate to `Manage account`, `HTTP Access tokens`, `Create Token`.
Generate the token and add it to .secret.toml under `bitbucket_server` section

```toml
[bitbucket_server]
bearer_token = "<your key>"
```

Don't forget to also set the URL of your Bitbucket Server instance (either in `.secret.toml` or in `configuration.toml`):

```toml
[bitbucket_server]
url = "<full URL to your Bitbucket instance, e.g.: https://git.bitbucket.com>"
```

### Run it as CLI

Modify `configuration.toml`:

```toml
git_provider="bitbucket_server"
```


and pass the Pull request URL:

```shell
python cli.py --pr_url https://git.on-prem-instance-of-bitbucket.com/projects/PROJECT/repos/REPO/pull-requests/1 review
```

### Run it as service

To run PR-Agent as webhook, build the docker image:

```bash
docker build . -t codiumai/pr-agent:bitbucket_server_webhook --target bitbucket_server_webhook -f docker/Dockerfile
docker push codiumai/pr-agent:bitbucket_server_webhook  # Push to your Docker repository
```

Navigate to `Projects` or `Repositories`, `Settings`, `Webhooks`, `Create Webhook`.
Fill in the name and URL. For Authentication, select 'None'. Select the 'Pull Request Opened' checkbox to receive that event as a webhook.

The URL should end with `/webhook`, for example: https://domain.com/webhook


================================================
FILE: docs/docs/installation/gitea.md
================================================
## Run a Gitea webhook server

1. In Gitea create a new user and give it "Reporter" role for the intended group or project.

2. For the user from step 1. generate a `personal_access_token` with `api` access.

3. Generate a random secret for your app, and save it for later (`webhook_secret`). For example, you can use:

```bash
WEBHOOK_SECRET=$(python -c "import secrets; print(secrets.token_hex(10))")
```

4. Clone this repository:

```bash
git clone https://github.com/qodo-ai/pr-agent.git
```

5. Prepare variables and secrets. Skip this step if you plan on setting these as environment variables when running the agent:
    - In the configuration file/variables:
        - Set `config.git_provider` to "gitea"
    - In the secrets file/variables:
        - Set your AI model key in the respective section
        - In the [Gitea] section, set `personal_access_token` (with token from step 2) and `webhook_secret` (with secret from step 3)

6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:

```bash
docker build -f /docker/Dockerfile -t pr-agent:gitea_app --target gitea_app .
docker push codiumai/pr-agent:gitea_webhook  # Push to your Docker repository
```

7. Set the environmental variables, the method depends on your docker runtime. Skip this step if you included your secrets/configuration directly in the Docker image.

```bash
CONFIG__GIT_PROVIDER=gitea
GITEA__PERSONAL_ACCESS_TOKEN=<personal_access_token>
GITEA__WEBHOOK_SECRET=<webhook_secret>
GITEA__URL=https://gitea.com # Or self host
OPENAI__KEY=<your_openai_api_key>
GITEA__SKIP_SSL_VERIFICATION=false # or true
GITEA__SSL_CA_CERT=/path/to/cacert.pem
```

8. Create a webhook in your Gitea project. Set the URL to `http[s]://<PR_AGENT_HOSTNAME>/api/v1/gitea_webhooks`, the secret token to the generated secret from step 3, and enable the triggers `push`, `comments` and `merge request events`.

9. Test your installation by opening a merge request or commenting on a merge request using one of PR Agent's commands.


================================================
FILE: docs/docs/installation/github.md
================================================
In this page we will cover how to install and run PR-Agent as a GitHub Action or GitHub App, and how to configure it for your needs.

## Run as a GitHub Action

You can use our pre-built Github Action Docker image to run PR-Agent as a Github Action.

1) Add the following file to your repository under `.github/workflows/pr_agent.yml`:

```yaml
on:
  pull_request:
    types: [opened, reopened, ready_for_review]
  issue_comment:
jobs:
  pr_agent_job:
    if: ${{ github.event.sender.type != 'Bot' }}
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
      contents: write
    name: Run pr agent on every pull request, respond to user comments
    steps:
      - name: PR Agent action step
        id: pragent
        uses: qodo-ai/pr-agent@main
        env:
          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
```

2) Add the following secret to your repository under `Settings > Secrets and variables > Actions > New repository secret > Add secret`:

```
Name = OPENAI_KEY
Secret = <your key>
```

The GITHUB_TOKEN secret is automatically created by GitHub.

3) Merge this change to your main branch.
When you open your next PR, you should see a comment from `github-actions` bot with a review of your PR, and instructions on how to use the rest of the tools.

4) You may configure PR-Agent by adding environment variables under the env section corresponding to any configurable property in the [configuration](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) file. Some examples:

```yaml
      env:
        # ... previous environment values
        OPENAI.ORG: "<Your organization name under your OpenAI account>"
        PR_REVIEWER.REQUIRE_TESTS_REVIEW: "false" # Disable tests review
        PR_CODE_SUGGESTIONS.NUM_CODE_SUGGESTIONS: 6 # Increase number of code suggestions
```

See detailed usage instructions in the [USAGE GUIDE](../usage-guide/automations_and_usage.md#github-action)

### Configuration Examples

This section provides detailed, step-by-step examples for configuring PR-Agent with different models and advanced options in GitHub Actions.

#### Quick Start Examples

##### Basic Setup (OpenAI Default)

Copy this minimal workflow to get started with the default OpenAI models:

```yaml
name: PR Agent
on:
  pull_request:
    types: [opened, reopened, ready_for_review]
  issue_comment:
jobs:
  pr_agent_job:
    if: ${{ github.event.sender.type != 'Bot' }}
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
      contents: write
    steps:
      - name: PR Agent action step
        uses: qodo-ai/pr-agent@main
        env:
          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
```

##### Gemini Setup

Ready-to-use workflow for Gemini models:

```yaml
name: PR Agent (Gemini)
on:
  pull_request:
    types: [opened, reopened, ready_for_review]
  issue_comment:
jobs:
  pr_agent_job:
    if: ${{ github.event.sender.type != 'Bot' }}
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
      contents: write
    steps:
      - name: PR Agent action step
        uses: qodo-ai/pr-agent@main
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          config.model: "gemini/gemini-1.5-flash"
          config.fallback_models: '["gemini/gemini-1.5-flash"]'
          GOOGLE_AI_STUDIO.GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          github_action_config.auto_review: "true"
          github_action_config.auto_describe: "true"
          github_action_config.auto_improve: "true"
```

#### Claude Setup

Ready-to-use workflow for Claude models:

```yaml
name: PR Agent (Claude)
on:
  pull_request:
    types: [opened, reopened, ready_for_review]
  issue_comment:
jobs:
  pr_agent_job:
    if: ${{ github.event.sender.type != 'Bot' }}
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
      contents: write
    steps:
      - name: PR Agent action step
        uses: qodo-ai/pr-agent@main
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          config.model: "anthropic/claude-3-opus-20240229"
          config.fallback_models: '["anthropic/claude-3-haiku-20240307"]'
          ANTHROPIC.KEY: ${{ secrets.ANTHROPIC_KEY }}
          github_action_config.auto_review: "true"
          github_action_config.auto_describe: "true"
          github_action_config.auto_improve: "true"
```

#### Basic Configuration with Tool Controls

Start with this enhanced workflow that includes tool configuration:

```yaml
on:
  pull_request:
    types: [opened, reopened, ready_for_review]
  issue_comment:
jobs:
  pr_agent_job:
    if: ${{ github.event.sender.type != 'Bot' }}
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
      contents: write
    name: Run pr agent on every pull request, respond to user comments
    steps:
      - name: PR Agent action step
        id: pragent
        uses: qodo-ai/pr-agent@main
        env:
          OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          # Enable/disable automatic tools
          github_action_config.auto_review: "true"
          github_action_config.auto_describe: "true"
          github_action_config.auto_improve: "true"
          # Configure which PR events trigger the action
          github_action_config.pr_actions: '["opened", "reopened", "ready_for_review", "review_requested"]'
```

#### Switching Models

##### Using Gemini (Google AI Studio)

To use Gemini models instead of the default OpenAI models:

```yaml
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Set the model to Gemini
        config.model: "gemini/gemini-1.5-flash"
        config.fallback_models: '["gemini/gemini-1.5-flash"]'
        # Add your Gemini API key
        GOOGLE_AI_STUDIO.GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
        # Tool configuration
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

**Required Secrets:**

- Add `GEMINI_API_KEY` to your repository secrets (get it from [Google AI Studio](https://aistudio.google.com/))

**Note:** When using non-OpenAI models like Gemini, you don't need to set `OPENAI_KEY` - only the model-specific API key is required.

##### Using Claude (Anthropic)

To use Claude models:

```yaml
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Set the model to Claude
        config.model: "anthropic/claude-3-opus-20240229"
        config.fallback_models: '["anthropic/claude-3-haiku-20240307"]'
        # Add your Anthropic API key
        ANTHROPIC.KEY: ${{ secrets.ANTHROPIC_KEY }}
        # Tool configuration
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

**Required Secrets:**

- Add `ANTHROPIC_KEY` to your repository secrets (get it from [Anthropic Console](https://console.anthropic.com/))

**Note:** When using non-OpenAI models like Claude, you don't need to set `OPENAI_KEY` - only the model-specific API key is required.

##### Using Azure OpenAI

To use Azure OpenAI services:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Azure OpenAI configuration
        OPENAI.API_TYPE: "azure"
        OPENAI.API_VERSION: "2023-05-15"
        OPENAI.API_BASE: ${{ secrets.AZURE_OPENAI_ENDPOINT }}
        OPENAI.DEPLOYMENT_ID: ${{ secrets.AZURE_OPENAI_DEPLOYMENT }}
        # Set the model to match your Azure deployment
        config.model: "gpt-4o"
        config.fallback_models: '["gpt-4o"]'
        # Tool configuration
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

**Required Secrets:**

- `AZURE_OPENAI_KEY`: Your Azure OpenAI API key
- `AZURE_OPENAI_ENDPOINT`: Your Azure OpenAI endpoint URL
- `AZURE_OPENAI_DEPLOYMENT`: Your deployment name

##### Using Local Models (Ollama)

To use local models via Ollama:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Set the model to a local Ollama model
        config.model: "ollama/qwen2.5-coder:32b"
        config.fallback_models: '["ollama/qwen2.5-coder:32b"]'
        config.custom_model_max_tokens: "128000"
        # Ollama configuration
        OLLAMA.API_BASE: "http://localhost:11434"
        # Tool configuration
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

**Note:** For local models, you'll need to use a self-hosted runner with Ollama installed, as GitHub Actions hosted runners cannot access localhost services.

#### Advanced Configuration Options

##### Custom Review Instructions

Add specific instructions for the review process:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Custom review instructions
        pr_reviewer.extra_instructions: "Focus on security vulnerabilities and performance issues. Check for proper error handling."
        # Tool configuration
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

##### Language-Specific Configuration

Configure for specific programming languages:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Language-specific settings
        pr_reviewer.extra_instructions: "Focus on Python best practices, type hints, and docstrings."
        pr_code_suggestions.num_code_suggestions: "8"
        pr_code_suggestions.suggestions_score_threshold: "7"
        # Tool configuration
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

##### Selective Tool Execution

Run only specific tools automatically:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Only run review and describe, skip improve
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "false"
        # Only trigger on PR open and reopen
        github_action_config.pr_actions: '["opened", "reopened"]'
```

#### Using Configuration Files

Instead of setting all options via environment variables, you can use a `.pr_agent.toml` file in your repository root:

1. Create a `.pr_agent.toml` file in your repository root:

```toml
[config]
model = "gemini/gemini-1.5-flash"
fallback_models = ["anthropic/claude-3-opus-20240229"]

[pr_reviewer]
extra_instructions = "Focus on security issues and code quality."

[pr_code_suggestions]
num_code_suggestions = 6
suggestions_score_threshold = 7
```

2. Use a simpler workflow file:

```yaml
on:
  pull_request:
    types: [opened, reopened, ready_for_review]
  issue_comment:
jobs:
  pr_agent_job:
    if: ${{ github.event.sender.type != 'Bot' }}
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
      contents: write
    name: Run pr agent on every pull request, respond to user comments
    steps:
      - name: PR Agent action step
        id: pragent
        uses: qodo-ai/pr-agent@main
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          GOOGLE_AI_STUDIO.GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
          ANTHROPIC.KEY: ${{ secrets.ANTHROPIC_KEY }}
          github_action_config.auto_review: "true"
          github_action_config.auto_describe: "true"
          github_action_config.auto_improve: "true"
```

#### Troubleshooting Common Issues

##### Model Not Found Errors

If you get model not found errors:

1. **Check model name format**: Ensure you're using the correct model identifier format (e.g., `gemini/gemini-1.5-flash`, not just `gemini-1.5-flash`)

2. **Verify API keys**: Make sure your API keys are correctly set as repository secrets

3. **Check model availability**: Some models may not be available in all regions or may require specific access

##### Environment Variable Format

Remember these key points about environment variables:

- Use dots (`.`) or double underscores (`__`) to separate sections and keys
- Boolean values should be strings: `"true"` or `"false"`
- Arrays should be JSON strings: `'["item1", "item2"]'`
- Model names are case-sensitive

##### Rate Limiting

If you encounter rate limiting:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Add fallback models for better reliability
        config.fallback_models: '["gpt-4o", "gpt-3.5-turbo"]'
        # Increase timeout for slower models
        config.ai_timeout: "300"
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

##### Common Error Messages and Solutions

**Error: "Model not found"**
- **Solution**: Check the model name format and ensure it matches the exact identifier. See the [Changing a model in PR-Agent](../usage-guide/changing_a_model.md) guide for supported models and their correct identifiers.

**Error: "API key not found"**
- **Solution**: Verify that your API key is correctly set as a repository secret and the environment variable name matches exactly
- **Note**: For non-OpenAI models (Gemini, Claude, etc.), you only need the model-specific API key, not `OPENAI_KEY`

**Error: "Rate limit exceeded"**
- **Solution**: Add fallback models or increase the `config.ai_timeout` value

**Error: "Permission denied"**
- **Solution**: Ensure your workflow has the correct permissions set:
  ```yaml
  permissions:
    issues: write
    pull-requests: write
    contents: write
  ```

**Error: "Invalid JSON format"**

- **Solution**: Check that arrays are properly formatted as JSON strings:

```yaml

Correct:
config.fallback_models: '["model1", "model2"]'
Incorrect (interpreted as a YAML list, not a string):
config.fallback_models: ["model1", "model2"]
```

##### Debugging Tips

1. **Enable verbose logging**: Add `config.verbosity_level: "2"` to see detailed logs
2. **Check GitHub Actions logs**: Look at the step output for specific error messages
3. **Test with minimal configuration**: Start with just the basic setup and add options one by one
4. **Verify secrets**: Double-check that all required secrets are set in your repository settings

##### Performance Optimization

For better performance with large repositories:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        # Optimize for large PRs
        config.large_patch_policy: "clip"
        config.max_model_tokens: "32000"
        config.patch_extra_lines_before: "3"
        config.patch_extra_lines_after: "1"
        github_action_config.auto_review: "true"
        github_action_config.auto_describe: "true"
        github_action_config.auto_improve: "true"
```

#### Reference

For more detailed configuration options, see:

- [Changing a model in PR-Agent](../usage-guide/changing_a_model.md)
- [Configuration options](../usage-guide/configuration_options.md)
- [Automations and usage](../usage-guide/automations_and_usage.md#github-action)

### Using a specific release

!!! tip ""
    if you want to pin your action to a specific release (v0.23 for example) for stability reasons, use:
    ```yaml
    ...
        steps:
          - name: PR Agent action step
            id: pragent
            uses: docker://codiumai/pr-agent:0.23-github_action
    ...
    ```

    For enhanced security, you can also specify the Docker image by its [digest](https://hub.docker.com/repository/docker/codiumai/pr-agent/tags):
    ```yaml
    ...
        steps:
          - name: PR Agent action step
            id: pragent
            uses: docker://codiumai/pr-agent@sha256:14165e525678ace7d9b51cda8652c2d74abb4e1d76b57c4a6ccaeba84663cc64
    ...
    ```

### Action for GitHub enterprise server

!!! tip ""
    To use the action with a GitHub enterprise server, add an environment variable `GITHUB.BASE_URL` with the API URL of your GitHub server.

    For example, if your GitHub server is at `https://github.mycompany.com`, add the following to your workflow file:
    ```yaml
          env:
            # ... previous environment values
            GITHUB.BASE_URL: "https://github.mycompany.com/api/v3"
    ```

---

## Run as a GitHub App

Allowing you to automate the review process on your private or public repositories.

1) Create a GitHub App from the [Github Developer Portal](https://docs.github.com/en/developers/apps/creating-a-github-app).

   - Set the following permissions:
     - Pull requests: Read & write
     - Issue comment: Read & write
     - Metadata: Read-only
     - Contents: Read-only
   - Set the following events:
     - Issue comment
     - Pull request
     - Push (if you need to enable triggering on PR update)

2) Generate a random secret for your app, and save it for later. For example, you can use:

```bash
WEBHOOK_SECRET=$(python -c "import secrets; print(secrets.token_hex(10))")
```

3) Acquire the following pieces of information from your app's settings page:

   - App private key (click "Generate a private key" and save the file)
   - App ID

4) Clone this repository:

```bash
git clone https://github.com/qodo-ai/pr-agent.git
```

5) Copy the secrets template file and fill in the following:

```bash
cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
# Edit .secrets.toml file
```

- Your OpenAI key.
- Copy your app's private key to the private_key field.
- Copy your app's ID to the app_id field.
- Copy your app's webhook secret to the webhook_secret field.
- Set deployment_type to 'app' in [configuration.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml)

    > The .secrets.toml file is not copied to the Docker image by default, and is only used for local development.
    > If you want to use the .secrets.toml file in your Docker image, you can add remove it from the .dockerignore file.
    > In most production environments, you would inject the secrets file as environment variables or as mounted volumes.
    > For example, in order to inject a secrets file as a volume in a Kubernetes environment you can update your pod spec to include the following,
    > assuming you have a secret named `pr-agent-settings` with a key named `.secrets.toml`:

    ```
           volumes:
            - name: settings-volume
              secret:
                secretName: pr-agent-settings
    // ...
           containers:
    // ...
              volumeMounts:
                - mountPath: /app/pr_agent/settings_prod
                  name: settings-volume
    ```

    > Another option is to set the secrets as environment variables in your deployment environment, for example `OPENAI.KEY` and `GITHUB.USER_TOKEN`.

6) Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:

    ```bash
    docker build . -t codiumai/pr-agent:github_app --target github_app -f docker/Dockerfile
    docker push codiumai/pr-agent:github_app  # Push to your Docker repository
    ```

7. Host the app using a server, serverless function, or container environment. Alternatively, for development and
   debugging, you may use tools like smee.io to forward webhooks to your local machine.
    You can check [Deploy as a Lambda Function](#deploy-as-a-lambda-function)

8. Go back to your app's settings, and set the following:

   - Webhook URL: The URL of your app's server or the URL of the smee.io channel.
   - Webhook secret: The secret you generated earlier.

9. Install the app by navigating to the "Install App" tab and selecting your desired repositories.

> **Note:** When running PR-Agent from GitHub app, the default configuration file (configuration.toml) will be loaded.
> However, you can override the default tool parameters by uploading a local configuration file `.pr_agent.toml`
> For more information please check out the [USAGE GUIDE](../usage-guide/automations_and_usage.md#github-app)
---

## Additional deployment methods

### Deploy as a Lambda Function

Note that since AWS Lambda env vars cannot have "." in the name, you can replace each "." in an env variable with "__".<br>
For example: `GITHUB.WEBHOOK_SECRET` --> `GITHUB__WEBHOOK_SECRET`

1. Follow steps 1-5 from [here](#run-as-a-github-app).
2. Build a docker image that can be used as a lambda function

    ```shell
    docker buildx build --platform=linux/amd64 . -t codiumai/pr-agent:github_lambda --target github_lambda -f docker/Dockerfile.lambda
   ```
   (Note: --target github_lambda is optional as it's the default target)


3. Push image to ECR

    ```shell
    docker tag codiumai/pr-agent:github_lambda <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:github_lambda
    docker push <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:github_lambda
    ```

4. Create a lambda function that uses the uploaded image. Set the lambda timeout to be at least 3m.
5. Configure the lambda function to have a Function URL.
6. In the environment variables of the Lambda function, specify `AZURE_DEVOPS_CACHE_DIR` to a writable location such as /tmp. (see [link](https://github.com/qodo-ai/pr-agent/pull/450#issuecomment-1840242269))
7. Go back to steps 8-9 of [Method 5](#run-as-a-github-app) with the function url as your Webhook URL.
    The Webhook URL would look like `https://<LAMBDA_FUNCTION_URL>/api/v1/github_webhooks`

#### Using AWS Secrets Manager

For production Lambda deployments, use AWS Secrets Manager instead of environment variables:

1. Create a secret in AWS Secrets Manager with JSON format like this:

```json
{
  "openai.key": "sk-proj-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
  "github.webhook_secret": "your-webhook-secret-from-step-2",
  "github.private_key": "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA...\n-----END RSA PRIVATE KEY-----"
}
```

2. Add IAM permission `secretsmanager:GetSecretValue` to your Lambda execution role
3. Set these environment variables in your Lambda:

```bash
AWS_SECRETS_MANAGER__SECRET_ARN=arn:aws:secretsmanager:us-east-1:123456789012:secret:pr-agent-secrets-AbCdEf
CONFIG__SECRET_PROVIDER=aws_secrets_manager
```

---

### AWS CodeCommit Setup

Not all features have been added to CodeCommit yet.  As of right now, CodeCommit has been implemented to run the PR-Agent CLI on the command line, using AWS credentials stored in environment variables.  (More features will be added in the future.)  The following is a set of instructions to have PR-Agent do a review of your CodeCommit pull request from the command line:

1. Create an IAM user that you will use to read CodeCommit pull requests and post comments
    - Note: That user should have CLI access only, not Console access
2. Add IAM permissions to that user, to allow access to CodeCommit (see IAM Role example below)
3. Generate an Access Key for your IAM user
4. Set the Access Key and Secret using environment variables (see Access Key example below)
5. Set the `git_provider` value to `codecommit` in the `pr_agent/settings/configuration.toml` settings file
6. Set the `PYTHONPATH` to include your `pr-agent` project directory
    - Option A: Add `PYTHONPATH="/PATH/TO/PROJECTS/pr-agent` to your `.env` file
    - Option B: Set `PYTHONPATH` and run the CLI in one command, for example:
        - `PYTHONPATH="/PATH/TO/PROJECTS/pr-agent python pr_agent/cli.py [--ARGS]`

---

##### AWS CodeCommit IAM Role Example

Example IAM permissions to that user to allow access to CodeCommit:

- Note: The following is a working example of IAM permissions that has read access to the repositories and write access to allow posting comments
- Note: If you only want pr-agent to review your pull requests, you can tighten the IAM permissions further, however this IAM example will work, and allow the pr-agent to post comments to the PR
- Note: You may want to replace the `"Resource": "*"` with your list of repos, to limit access to only those repos

```json
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "codecommit:BatchDescribe*",
                "codecommit:BatchGet*",
                "codecommit:Describe*",
                "codecommit:EvaluatePullRequestApprovalRules",
                "codecommit:Get*",
                "codecommit:List*",
                "codecommit:PostComment*",
                "codecommit:PutCommentReaction",
                "codecommit:UpdatePullRequestDescription",
                "codecommit:UpdatePullRequestTitle"
            ],
            "Resource": "*"
        }
    ]
}
```

##### AWS CodeCommit Access Key and Secret

Example setting the Access Key and Secret using environment variables

```sh
export AWS_ACCESS_KEY_ID="XXXXXXXXXXXXXXXX"
export AWS_SECRET_ACCESS_KEY="XXXXXXXXXXXXXXXX"
export AWS_DEFAULT_REGION="us-east-1"
```

##### AWS CodeCommit CLI Example

After you set up AWS CodeCommit using the instructions above, here is an example CLI run that tells pr-agent to **review** a given pull request.
(Replace your specific PYTHONPATH and PR URL in the example)

```sh
PYTHONPATH="/PATH/TO/PROJECTS/pr-agent" python pr_agent/cli.py \
  --pr_url https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/MY_REPO_NAME/pull-requests/321 \
  review
```

================================================
FILE: docs/docs/installation/gitlab.md
================================================
## Run as a GitLab Pipeline

You can use a pre-built Action Docker image to run PR-Agent as a GitLab pipeline. This is a simple way to get started with PR-Agent without setting up your own server.

(1) Add the following file to your repository under `.gitlab-ci.yml`:

```yaml
stages:
  - pr_agent

pr_agent_job:
  stage: pr_agent
  image:
    name: codiumai/pr-agent:latest
    entrypoint: [""]
  script:
    - cd /app
    - echo "Running PR Agent action step"
    - export MR_URL="$CI_MERGE_REQUEST_PROJECT_URL/merge_requests/$CI_MERGE_REQUEST_IID"
    - echo "MR_URL=$MR_URL"
    - export gitlab__url=$CI_SERVER_PROTOCOL://$CI_SERVER_FQDN
    - export gitlab__PERSONAL_ACCESS_TOKEN=$GITLAB_PERSONAL_ACCESS_TOKEN
    - export config__git_provider="gitlab"
    - export openai__key=$OPENAI_KEY
    - python -m pr_agent.cli --pr_url="$MR_URL" describe
    - python -m pr_agent.cli --pr_url="$MR_URL" review
    - python -m pr_agent.cli --pr_url="$MR_URL" improve
  rules:
    - if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
```

This script will run PR-Agent on every new merge request. You can modify the `rules` section to run PR-Agent on different events.
You can also modify the `script` section to run different PR-Agent commands, or with different parameters by exporting different environment variables.

(2) Add the following masked variables to your GitLab repository (CI/CD -> Variables):

- `GITLAB_PERSONAL_ACCESS_TOKEN`: Your GitLab personal access token.

- `OPENAI_KEY`: Your OpenAI key.

Note that if your base branches are not protected, don't set the variables as `protected`, since the pipeline will not have access to them.

> **Note**: The `$CI_SERVER_FQDN` variable is available starting from GitLab version 16.10. If you're using an earlier version, this variable will not be available. However, you can combine `$CI_SERVER_HOST` and `$CI_SERVER_PORT` to achieve the same result. Please ensure you're using a compatible version or adjust your configuration.

> **Note**: The `gitlab__SSL_VERIFY` environment variable can be used to specify the path to a custom CA certificate bundle for SSL verification. GitLab exposes the `$CI_SERVER_TLS_CA_FILE` variable, which points to the custom CA certificate file configured in your GitLab instance.
> Alternatively, SSL verification can be disabled entirely by setting `gitlab__SSL_VERIFY=false`, although this is not recommended.

## Run a GitLab webhook server

1. In GitLab create a new user and give it "Reporter" role for the intended group or project.

2. For the user from step 1, generate a `personal_access_token` with `api` access.

3. Generate a random secret for your app, and save it for later (`shared_secret`). For example, you can use:

```bash
SHARED_SECRET=$(python -c "import secrets; print(secrets.token_hex(10))")
```

4. Clone this repository:

```bash
git clone https://github.com/qodo-ai/pr-agent.git
```

5. Prepare variables and secrets. Skip this step if you plan on setting these as environment variables when running the agent:
    1. In the configuration file/variables:
        - Set `config.git_provider` to "gitlab"

    2. In the secrets file/variables:
        - Set your AI model key in the respective section
        - In the [gitlab] section, set `personal_access_token` (with token from step 2) and `shared_secret` (with secret from step 3)
        - **Authentication type**: Set `auth_type` to `"private_token"` for older GitLab versions (e.g., 11.x) or private deployments. Default is `"oauth_token"` for gitlab.com and newer versions.

6. Build a Docker image for the app and optionally push it to a Docker repository. We'll use Dockerhub as an example:

```bash
docker build . -t gitlab_pr_agent --target gitlab_webhook -f docker/Dockerfile
docker push codiumai/pr-agent:gitlab_webhook  # Push to your Docker repository
```

7. Set the environmental variables, the method depends on your docker runtime. Skip this step if you included your secrets/configuration directly in the Docker image.

```bash
CONFIG__GIT_PROVIDER=gitlab
GITLAB__PERSONAL_ACCESS_TOKEN=<personal_access_token>
GITLAB__SHARED_SECRET=<shared_secret>
GITLAB__URL=https://gitlab.com
GITLAB__AUTH_TYPE=oauth_token  # Use "private_token" for older GitLab versions
OPENAI__KEY=<your_openai_api_key>
PORT=3000  # Optional: override the webhook server port
```

8. Create a webhook in your GitLab project. Set the URL to `http[s]://<PR_AGENT_HOSTNAME>/webhook`, the secret token to the generated secret from step 3, and enable the triggers `push`, `comments` and `merge request events`.

9. Test your installation by opening a merge request or commenting on a merge request using one of PR Agent's commands.

## Deploy as a Lambda Function

Note that since AWS Lambda env vars cannot have "." in the name, you can replace each "." in an env variable with "__".<br>
For example: `GITLAB.PERSONAL_ACCESS_TOKEN` --> `GITLAB__PERSONAL_ACCESS_TOKEN`

1. Follow steps 1-5 from [Run a GitLab webhook server](#run-a-gitlab-webhook-server).
2. Build a docker image that can be used as a lambda function

    ```shell
    docker buildx build --platform=linux/amd64 . -t codiumai/pr-agent:gitlab_lambda --target gitlab_lambda -f docker/Dockerfile.lambda
   ```

3. Push image to ECR

    ```shell
    docker tag codiumai/pr-agent:gitlab_lambda <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:gitlab_lambda
    docker push <AWS_ACCOUNT>.dkr.ecr.<AWS_REGION>.amazonaws.com/codiumai/pr-agent:gitlab_lambda
    ```

4. Create a lambda function that uses the uploaded image. Set the lambda timeout to be at least 3m.
5. Configure the lambda function to have a Function URL.
6. In the environment variables of the Lambda function, specify `AZURE_DEVOPS_CACHE_DIR` to a writable location such as /tmp. (see [link](https://github.com/qodo-ai/pr-agent/pull/450#issuecomment-1840242269))
7. Go back to steps 8-9 of [Run a GitLab webhook server](#run-a-gitlab-webhook-server) with the function URL as your Webhook URL.
    The Webhook URL would look like `https://<LAMBDA_FUNCTION_URL>/webhook`

### Using AWS Secrets Manager

For production Lambda deployments, use AWS Secrets Manager instead of environment variables:

1. Create individual secrets for each GitLab webhook with this JSON format (e.g., secret name: `project-webhook-secret-001`)

```json
{
  "gitlab_token": "glpat-xxxxxxxxxxxxxxxxxxxxxxxx",
  "token_name": "project-webhook-001"
}
```

2. Create a main configuration secret for common settings (e.g., secret name: `pr-agent-main-config`)

```json
{
  "openai.key": "sk-proj-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
}
```

3. Set these environment variables in your Lambda:

```bash
CONFIG__SECRET_PROVIDER=aws_secrets_manager
AWS_SECRETS_MANAGER__SECRET_ARN=arn:aws:secretsmanager:us-east-1:123456789012:secret:pr-agent-main-config-AbCdEf
```

4. In your GitLab webhook configuration, set the **Secret Token** to the **Secret name** created in step 1:
   - Example: `project-webhook-secret-001`

**Important**: When using Secrets Manager, GitLab's webhook secret must be the Secrets Manager secret name.

5. Add IAM permission `secretsmanager:GetSecretValue` to your Lambda execution role


================================================
FILE: docs/docs/installation/index.md
================================================
# Installation

There are several ways to use PR-Agent:

- [Locally](./locally.md)
- [GitHub integration](./github.md)
- [GitLab integration](./gitlab.md)
- [BitBucket integration](./bitbucket.md)
- [Azure DevOps integration](./azure.md)
- [Gitea integration](./gitea.md)


================================================
FILE: docs/docs/installation/locally.md
================================================
To run PR-Agent locally, you first need to acquire two keys:

1. An OpenAI key from [here](https://platform.openai.com/api-keys){:target="_blank"}, with access to GPT-4 and o4-mini (or a key for other [language models](../usage-guide/changing_a_model.md), if you prefer).
2. A personal access token from your Git platform (GitHub, GitLab, BitBucket,Gitea) with repo scope. GitHub token, for example, can be issued from [here](https://github.com/settings/tokens){:target="_blank"}

## Using Docker image

A list of the relevant tools can be found in the [tools guide](../tools/).

To invoke a tool (for example `review`), you can run PR-Agent directly from the Docker image. Here's how:

- For GitHub:

    ```bash
    docker run --rm -it -e OPENAI.KEY=<your_openai_key> -e GITHUB.USER_TOKEN=<your_github_token> codiumai/pr-agent:latest --pr_url <pr_url> review
    ```

    If you are using GitHub enterprise server, you need to specify the custom url as variable.
    For example, if your GitHub server is at `https://github.mycompany.com`, add the following to the command:

    ```bash
    -e GITHUB.BASE_URL=https://github.mycompany.com/api/v3
    ```

- For GitLab:

    ```bash
    docker run --rm -it -e OPENAI.KEY=<your key> -e CONFIG.GIT_PROVIDER=gitlab -e GITLAB.PERSONAL_ACCESS_TOKEN=<your token> codiumai/pr-agent:latest --pr_url <pr_url> review
    ```

    If you have a dedicated GitLab instance, you need to specify the custom url as variable:

    ```bash
    -e GITLAB.URL=<your gitlab instance url>
    ```

- For BitBucket:

    ```bash
    docker run --rm -it -e CONFIG.GIT_PROVIDER=bitbucket -e OPENAI.KEY=$OPENAI_API_KEY -e BITBUCKET.BEARER_TOKEN=$BITBUCKET_BEARER_TOKEN codiumai/pr-agent:latest --pr_url=<pr_url> review
    ```

- For Gitea:

    ```bash
    docker run --rm -it -e OPENAI.KEY=<your key> -e CONFIG.GIT_PROVIDER=gitea -e GITEA.PERSONAL_ACCESS_TOKEN=<your token> codiumai/pr-agent:latest --pr_url <pr_url> review
    ```

    If you have a dedicated Gitea instance, you need to specify the custom url as variable:

    ```bash
    -e GITEA.URL=<your gitea instance url>
    ```


For other git providers, update `CONFIG.GIT_PROVIDER` accordingly and check the [`pr_agent/settings/.secrets_template.toml`](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/.secrets_template.toml) file for environment variables expected names and values.

### Utilizing environment variables

It is also possible to provide or override the configuration by setting the corresponding environment variables.
You can define the corresponding environment variables by following this convention: `<TABLE>__<KEY>=<VALUE>` or `<TABLE>.<KEY>=<VALUE>`.
The `<TABLE>` refers to a table/section in a configuration file and `<KEY>=<VALUE>` refers to the key/value pair of a setting in the configuration file.

For example, suppose you want to run `pr_agent` that connects to a self-hosted GitLab instance similar to an example above.
You can define the environment variables in a plain text file named `.env` with the following content:

```bash
CONFIG__GIT_PROVIDER="gitlab"
GITLAB__URL="<your url>"
GITLAB__PERSONAL_ACCESS_TOKEN="<your token>"
OPENAI__KEY="<your key>"
```

Then, you can run `pr_agent` using Docker with the following command:

```shell
docker run --rm -it --env-file .env codiumai/pr-agent:latest <tool> <tool parameter>
```

---

### I get an error when running the Docker image. What should I do?

If you encounter an error when running the Docker image, it is almost always due to a misconfiguration of api keys or tokens.

Note that litellm, which is used by pr-agent, sometimes returns non-informative error messages such as `APIError: OpenAIException - Connection error.`
Carefully check the api keys and tokens you provided and make sure they are correct.
Adjustments may be needed depending on your llm provider.

For example, for Azure OpenAI, additional keys are [needed](../usage-guide/changing_a_model.md#azure).
Same goes for other providers, make sure to check the [documentation](../usage-guide/changing_a_model.md#changing-a-model)

## Using pip package

Install the package:

```bash
pip install pr-agent
```

Then run the relevant tool with the script below.
<br>
Make sure to fill in the required parameters (`user_token`, `openai_key`, `pr_url`, `command`):

```python
from pr_agent import cli
from pr_agent.config_loader import get_settings

def main():
    # Fill in the following values
    provider = "github" # github/gitlab/bitbucket/azure_devops
    user_token = "..."  #  user token
    openai_key = "..."  # OpenAI key
    pr_url = "..."      # PR URL, for example 'https://github.com/qodo-ai/pr-agent/pull/809'
    command = "/review" # Command to run (e.g. '/review', '/describe', '/ask="What is the purpose of this PR?"', ...)

    # Setting the configurations
    get_settings().set("CONFIG.git_provider", provider)
    get_settings().set("openai.key", openai_key)
    get_settings().set("github.user_token", user_token)

    # Run the command. Feedback will appear in GitHub PR comments
    cli.run_command(pr_url, command)


if __name__ == '__main__':
    main()
```

## Run from source

1. Clone this repository:

```bash
git clone https://github.com/qodo-ai/pr-agent.git
```

2. Navigate to the `/pr-agent` folder and install the requirements in your favorite virtual environment:

```bash
pip install -e .
```

*Note: If you get an error related to Rust in the dependency installation then make sure Rust is installed and in your `PATH`, instructions: https://rustup.rs*

3. Copy the secrets template file and fill in your OpenAI key and your GitHub user token:

```bash
cp pr_agent/settings/.secrets_template.toml pr_agent/settings/.secrets.toml
chmod 600 pr_agent/settings/.secrets.toml
# Edit .secrets.toml file
```

4. Run the cli.py script:

```bash
python3 -m pr_agent.cli --pr_url <pr_url> review
python3 -m pr_agent.cli --pr_url <pr_url> ask <your question>
python3 -m pr_agent.cli --pr_url <pr_url> describe
python3 -m pr_agent.cli --pr_url <pr_url> improve
python3 -m pr_agent.cli --pr_url <pr_url> add_docs
python3 -m pr_agent.cli --pr_url <pr_url> generate_labels
python3 -m pr_agent.cli --issue_url <issue_url> similar_issue
...
```

[Optional] Add the pr_agent folder to your PYTHONPATH

```bash
export PYTHONPATH=$PYTHONPATH:<PATH to pr_agent folder>
```


================================================
FILE: docs/docs/installation/pr_agent.md
================================================
# PR-Agent Installation Guide

PR-Agent can be deployed in various environments and platforms. Choose the installation method that best suits your needs:

## 🖥️ Local Installation

Learn how to run PR-Agent locally using:

- Docker image
- pip package
- CLI from source code

[View Local Installation Guide →](./locally.md)

## 🐙 GitHub Integration

Set up PR-Agent with GitHub as:

- GitHub Action
- Local GitHub App

[View GitHub Integration Guide →](./github.md)

## 🦊 GitLab Integration

Deploy PR-Agent on GitLab as:

- GitLab pipeline job
- Local GitLab webhook server

[View GitLab Integration Guide →](./gitlab.md)

## 🟦 BitBucket Integration

Implement PR-Agent in BitBucket as:

- BitBucket pipeline job
- Local BitBucket server

[View BitBucket Integration Guide →](./bitbucket.md)

## 🔷  Azure DevOps Integration

Configure PR-Agent with Azure DevOps as:

- Azure DevOps pipeline job
- Local Azure DevOps webhook

[View Azure DevOps Integration Guide →](./azure.md)


================================================
FILE: docs/docs/overview/data_privacy.md
================================================
## Self-hosted PR-Agent

- If you self-host PR-Agent with your OpenAI (or other LLM provider) API key, it is between you and the provider.


================================================
FILE: docs/docs/summary.md
================================================
# Table of contents

* [Overview](index.md)
  * [Data Privacy](overview/data_privacy.md)

## Installation

* [Installation](installation/index.md)
* [PR-Agent](installation/pr_agent.md)

## Usage Guide

* [Usage Guide](usage-guide/index.md)
* [Introduction](usage-guide/introduction.md)
* [Configuration File](usage-guide/configuration_options.md)
* [Usage and Automation](usage-guide/automations_and_usage.md)
* [Managing Mail Notifications](usage-guide/mail_notifications.md)
* [Changing a Model](usage-guide/changing_a_model.md)
* [Additional Configurations](usage-guide/additional_configurations.md)
* [Frequently Asked Questions](faq/index.md)

## Tools

* [Tools](tools/index.md)
* [Describe](tools/describe.md)
* [Review](tools/review.md)
* [Improve](tools/improve.md)
* [Ask](tools/ask.md)
* [Add Docs](tools/add_docs.md)
* [Generate Labels](tools/generate_labels.md)
* [Similar Issues](tools/similar_issues.md)
* [Help](tools/help.md)
* [Help Docs](tools/help_docs.md)
* [Update Changelog](tools/update_changelog.md)

## Core Abilities

* [Core Abilities](core-abilities/index.md)
* [Chat on code suggestions](core-abilities/interactivity.md)
* [Compression strategy](core-abilities/compression_strategy.md)
* [Dynamic context](core-abilities/dynamic_context.md)
* [Fetching ticket context](core-abilities/fetching_ticket_context.md)
* [Interactivity](core-abilities/interactivity.md)
* [Local and global metadata](core-abilities/metadata.md)
* [Self-reflection](core-abilities/self_reflection.md)


================================================
FILE: docs/docs/tools/add_docs.md
================================================
## Overview

The `add_docs` tool scans the PR code changes and suggests documentation for any code components that are missing documentation, such as functions, classes, and methods.

It can be invoked manually by commenting on any PR:

```
/add_docs
```

## Example usage

Invoke the tool manually by commenting `/add_docs` on any PR:

![Add Docs](https://codium.ai/images/pr_agent/add_docs_comment.png){width=512}

The tool will generate documentation suggestions as inline code suggestions:

![Add Docs Result](https://codium.ai/images/pr_agent/add_docs_result.png){width=512}

### Language-specific documentation styles

The tool automatically detects the programming language and generates documentation in the appropriate format:

| Language | Documentation Format |
|----------|---------------------|
| Python | Docstrings (Sphinx, Google, Numpy styles) |
| Java | Javadocs |
| JavaScript/TypeScript | JSdocs |
| C++ | Doxygen |
| Other | Generic documentation |

## Configuration options

Under the section `[pr_add_docs]`, the following options are available:

| Option | Type | Default | Description |
|--------|------|---------|-------------|
| `extra_instructions` | string | `""` | Additional instructions for the AI model |
| `docs_style` | string | `"Sphinx"` | Documentation style for Python. Options: `"Sphinx"`, `"Google Style with Args, Returns, Attributes...etc"`, `"Numpy Style"`, `"PEP257"`, `"reStructuredText"` |
| `file` | string | `""` | Specific file to document (useful when multiple components have the same name) |
| `class_name` | string | `""` | Specific class name to target (useful when methods have the same name in the same file) |

### Example configuration

To customize the documentation style, add the following to your configuration file:

```toml
[pr_add_docs]
docs_style = "Google Style with Args, Returns, Attributes...etc"
extra_instructions = "Focus on documenting public methods and include usage examples"
```

### Command line options

You can pass configuration options directly in the command:

```
/add_docs --pr_add_docs.docs_style="Numpy Style"
```

## How it works

1. The tool analyzes the PR diff to identify code components (functions, classes, methods) that lack documentation
2. It uses AI to generate appropriate documentation based on the code context and language
3. Documentation suggestions are published as inline code suggestions that can be applied with a single click


================================================
FILE: docs/docs/tools/ask.md
================================================
## Overview

The `ask` tool answers questions about the PR, based on the PR code changes. Make sure to be specific and clear in your questions.
It can be invoked manually by commenting on any PR:

```
/ask "..."
```

## Example usage

![Ask Comment](https://codium.ai/images/pr_agent/ask_comment.png){width=512}

![Ask](https://codium.ai/images/pr_agent/ask.png){width=512}

## Ask lines

You can run `/ask` on specific lines of code in the PR from the PR's diff view. The tool will answer questions based on the code changes in the selected lines.

- Click on the '+' sign next to the line number to select the line.
- To select multiple lines, click on the '+' sign of the first line and then hold and drag to select the rest of the lines.
- write `/ask "..."` in the comment box and press `Add single comment` button.

![Ask Line](https://codium.ai/images/pr_agent/Ask_line.png){width=512}

Note that the tool does not have "memory" of previous questions, and answers each question independently.

## Ask on images

You can also ask questions about images that appear in the comment, where the entire PR code will be used as context.
<br>
The basic syntax is:

```
/ask "..."

[Image](https://real_link_to_image)
```

where `https://real_link_to_image` is the direct link to the image.

Note that GitHub has a built-in mechanism of pasting images in comments. However, pasted image does not provide a direct link.
To get a direct link to an image, we recommend using the following scheme:

1\. First, post a comment that contains **only** the image:

![Ask image1](https://codium.ai/images/pr_agent/ask_images1.png){width=512}

2\. Quote reply to that comment:

![Ask image2](https://codium.ai/images/pr_agent/ask_images2.png){width=512}

3\. In the screen opened, type the question below the image:

![Ask image3](https://codium.ai/images/pr_agent/ask_images3.png){width=512}
![Ask image4](https://codium.ai/images/pr_agent/ask_images4.png){width=512}

4\. Post the comment, and receive the answer:

![Ask image5](https://codium.ai/images/pr_agent/ask_images5.png){width=512}

See a full video tutorial [here](https://codium.ai/images/pr_agent/ask_image_video.mov)


================================================
FILE: docs/docs/tools/describe.md
================================================
## Overview

The `describe` tool scans the PR code changes, and generates a description for the PR - title, type, summary, walkthrough and labels.

The tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or it can be invoked manually by commenting on any PR:

```
/describe
```

## Example usage

### Manual triggering

Invoke the tool manually by commenting `/describe` on any PR:

![Describe comment](https://codium.ai/images/pr_agent/describe_comment.png){width=512}

After ~30 seconds, the tool will generate a description for the PR:

![Describe New](https://codium.ai/images/pr_agent/describe_new.png){width=512}

If you want to edit [configurations](#configuration-options), add the relevant ones to the command:

```
/describe --pr_description.some_config1=... --pr_description.some_config2=...
```

### Automatic triggering

To run the `describe` automatically when a PR is opened, define in a [configuration file](../usage-guide/configuration_options.md#wiki-configuration-file):

```
[github_app]
pr_commands = [
    "/describe",
    ...
]

[pr_description]
publish_labels = true
...
```

- The `pr_commands` lists commands that will be executed automatically when a PR is opened.
- The `[pr_description]` section contains the configurations for the `describe` tool you want to edit (if any).

## Preserving the original user description

By default, PR-Agent tries to preserve your original PR description by placing it above the generated content.
This requires including your description during the initial PR creation.

"PR-Agent removed the original description from the PR. Why"?

From our experience, there are two possible reasons:

- If you edit the description _while_ the automated tool is running, a race condition may occur, potentially causing your original description to be lost. Hence, create a description before launching the PR.

- When _updating_ PR descriptions, the `/describe` tool considers everything above the "PR Type" field as user content and will preserve it.
Everything below this marker is treated as previously auto-generated content and will be replaced.

![Describe comment](https://codium.ai/images/pr_agent/pr_description_user_description.png){width=512}

## Sequence Diagram Support 
The `/describe` tool includes a Mermaid sequence diagram showing component/function interactions. 

This option is enabled by default via the `pr_description.enable_pr_diagram` param.


[//]: # (### How to enable\disable)

[//]: # ()
[//]: # (In your configuration:)

[//]: # ()
[//]: # (```)

[//]: # (toml)

[//]: # ([pr_description])

[//]: # (enable_pr_diagram = true)

[//]: # (```)

## Configuration options

???+ example "Possible configurations"

    <table>
      <tr>
        <td><b>publish_labels</b></td>
        <td>If set to true, the tool will publish labels to the PR. Default is false.</td>
      </tr>
      <tr>
        <td><b>publish_description_as_comment</b></td>
        <td>If set to true, the tool will publish the description as a comment to the PR. If false, it will overwrite the original description. Default is false.</td>
      </tr>
      <tr>
        <td><b>publish_description_as_comment_persistent</b></td>
        <td>If set to true and `publish_description_as_comment` is true, the tool will publish the description as a persistent comment to the PR. Default is true.</td>
      </tr>
      <tr>
        <td><b>add_original_user_description</b></td>
        <td>If set to true, the tool will add the original user description to the generated description. Default is true.</td>
      </tr>
      <tr>
        <td><b>generate_ai_title</b></td>
        <td>If set to true, the tool will also generate an AI title for the PR. Default is false.</td>
      </tr>
      <tr>
        <td><b>extra_instructions</b></td>
        <td>Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ..."</td>
      </tr>
      <tr>
        <td><b>enable_pr_type</b></td>
        <td>If set to false, it will not show the `PR type` as a text value in the description content. Default is true.</td>
      </tr>
      <tr>
        <td><b>final_update_message</b></td>
        <td>If set to true, it will add a comment message [`PR Description updated to latest commit...`](https://github.com/qodo-ai/pr-agent/pull/499#issuecomment-1837412176) after finishing calling `/describe`. Default is true.</td>
      </tr>
      <tr>
        <td><b>enable_semantic_files_types</b></td>
        <td>If set to true, "Changes walkthrough" section will be generated. Default is true.</td>
      </tr>
      <tr>
            <td><b>file_table_collapsible_open_by_default</b></td>
            <td>If set to true, the file list in the "Changes walkthrough" section will be open by default. If set to false, it will be closed by default. Default is false.</td>
      </tr>
      <tr>
        <td><b>collapsible_file_list</b></td>
        <td>If set to true, the file list in the "Changes walkthrough" section will be collapsible. If set to "adaptive", the file list will be collapsible only if there are more than 8 files. Default is "adaptive".</td>
      </tr>
      <tr>
        <td><b>enable_large_pr_handling</b></td>
        <td>If set to true, in case of a large PR the tool will make several calls to the AI and combine them to be able to cover more files. Default is true.</td>
      </tr>
      <tr>
        <td><b>enable_help_text</b></td>
        <td>If set to true, the tool will display a help text in the comment. Default is false.</td>
      </tr>
      <tr>
        <td><b>enable_pr_diagram</b></td>
        <td>If set to true, the tool will generate a horizontal Mermaid flowchart summarizing the main pull request changes. This field remains empty if not applicable. Default is true.</td>
      </tr>
      <tr>
        <td><b>auto_create_ticket</b></td>
        <td>If set to true, this will automatically create a ticket in the ticketing system when a PR is opened. Default is false.</td>
      </tr>
    </table>

## Markers template

To enable markers, set `pr_description.use_description_markers=true`.
Markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism.

For example, if the PR original description was:

```
User content...

## PR Type:
pr_agent:type

## PR Description:
pr_agent:summary

## PR Walkthrough:
pr_agent:walkthrough

## PR Diagram:
pr_agent:diagram
```

The marker `pr_agent:type` will be replaced with the PR type, `pr_agent:summary` will be replaced with the PR summary, `pr_agent:walkthrough` will be replaced with the PR walkthrough, and `pr_agent:diagram` will be replaced with the sequence diagram (if enabled).

![Describe markers before](https://codium.ai/images/pr_agent/describe_markers_before.png){width=512}

becomes

![Describe markers after](https://codium.ai/images/pr_agent/describe_markers_after.png){width=512}

**Configuration params**:

- `use_description_markers`: if set to true, the tool will use markers template. It replaces every marker of the form `pr_agent:marker_name` with the relevant content. Default is false.
- `include_generated_by_header`: if set to true, the tool will add a dedicated header: 'Generated by PR Agent at ...' to any automatic content. Default is true.
- `diagram`: if present as a marker, will be replaced by the PR sequence diagram (if enabled).

## Custom labels

The default labels of the describe tool are quite generic, since they are meant to be used in any repo: [`Bug fix`, `Tests`, `Enhancement`, `Documentation`, `Other`].

You can define custom labels that are relevant for your repo and use cases.
Custom labels can be defined in a configuration file, or directly in the repo's [labels page](#handle-custom-labels-from-the-repos-labels-page).

Make sure to provide proper title, and a detailed and well-phrased description for each label, so the tool will know when to suggest it.
Each label description should be a **conditional statement**, that indicates if to add the label to the PR or not, according to the PR content.

???+ tip "Auto-remove custom label when no longer relevant"
    If the custom label is no longer relevant, it will be automatically removed from the PR by running the `generate_labels` tool or the `describe` tool.


### Handle custom labels from a configuration file

Example for a custom labels configuration setup in a configuration file:

```
[config]
enable_custom_labels=true


[custom_labels."sql_changes"]
description = "Use when a PR contains changes to SQL queries"

[custom_labels."test"]
description = "use when a PR primarily contains new tests"

...
```

### Handle custom labels from the Repo's labels page

You can also control the custom labels that will be suggested by the `describe` tool from the repo's labels page:

- GitHub : go to `https://github.com/{owner}/{repo}/labels` (or click on the "Labels" tab in the issues or PRs page)
- GitLab : go to `https://gitlab.com/{owner}/{repo}/-/labels` (or click on "Manage" -> "Labels" on the left menu)

Now add/edit the custom labels. they should be formatted as follows:

- Label name: The name of the custom label.
- Description: Start the description of with prefix `pr_agent:`, for example: `pr_agent: Description of when AI should suggest this label`.<br>

Examples for custom labels:

- `Main topic:performance` -  pr_agent:The main topic of this PR is performance
- `New endpoint` -  pr_agent:A new endpoint was added in this PR
- `SQL query` -  pr_agent:A new SQL query was added in this PR
- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile
- ...

The description should be comprehensive and detailed, indicating when to add the desired label. For example:
![Add native custom labels](https://codium.ai/images/pr_agent/add_native_custom_labels.png){width=768}

## Usage Tips

!!! tip "Automation"
    - When you first install PR-Agent app, the [default mode](../usage-guide/automations_and_usage.md#github-app) for the describe tool is:
    ```
    pr_commands = ["/describe", ...]
    ```
    meaning the `describe` tool will run automatically on every PR, with the default configurations.

- Markers are an alternative way to control the generated description, to give maximal control to the user. If you set:

   ```
   pr_commands = ["/describe --pr_description.use_description_markers=true", ...]
   ```

   the tool will replace every marker of the form `pr_agent:marker_name` in the PR description with the relevant content, where `marker_name` is one of the following:
         *`type`: the PR type.
         * `summary`: the PR summary.
         * `walkthrough`: the PR walkthrough.

- Note that when markers are enabled, if the original PR description does not contain any markers, the tool will not alter the description at all.


================================================
FILE: docs/docs/tools/generate_labels.md
================================================
## Overview

The `generate_labels` tool scans the PR code changes and generates custom labels for the PR based on the content and context of the changes.

It can be invoked manually by commenting on any PR:

```
/generate_labels
```

## Example usage

Invoke the tool manually by commenting `/generate_labels` on any PR:

![Generate Labels](https://codium.ai/images/pr_agent/generate_labels_comment.png){width=512}

The tool will analyze the PR and add appropriate labels:

![Generate Labels Result](https://codium.ai/images/pr_agent/generate_labels_result.png){width=512}

## Configuration options

The `generate_labels` tool uses configurations from the `[pr_description]` section for custom labels.

### Enabling custom labels

To use custom labels, you need to enable them in the configuration:

```toml
[config]
enable_custom_labels = true
```

### Defining custom labels

You can define your own custom labels in the `[custom_labels]` section. See the [custom_labels.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/custom_labels.toml) file for examples.

Example configuration:

```toml
[custom_labels."Bug fix"]
description = "A fix for a bug in the codebase"

[custom_labels."Feature"]
description = "A new feature or enhancement"

[custom_labels."Documentation"]
description = "Documentation changes only"

[custom_labels."Tests"]
description = "Adding or modifying tests"

[custom_labels."Refactoring"]
description = "Code refactoring without functional changes"
```

### How labels are applied

1. The tool analyzes the PR diff and commit messages
2. It uses AI to determine which labels best match the PR content
3. Labels are automatically applied to the PR (if the git provider supports it)
4. If labels cannot be applied directly, they are published as a comment

## Comparison with `/describe` labels

The `/describe` tool also generates labels as part of its output. The key differences are:

| Feature | `/generate_labels` | `/describe` |
|---------|-------------------|-------------|
| Purpose | Dedicated label generation | Full PR description with labels |
| Output | Labels only | Title, summary, walkthrough, and labels |
| Custom labels | ✅ Supported | ✅ Supported |
| Use case | When you only need labels | When you want a complete PR description |

## Tips

- Use custom labels that match your team's workflow and labeling conventions
- Combine with automation to automatically label PRs when they are opened
- Review the generated labels and adjust custom label descriptions if the AI consistently misclassifies PRs


================================================
FILE: docs/docs/tools/help.md
================================================
## Overview

The `help` tool provides a list of all the available tools and their descriptions.
For PR-Agent users, it also enables to trigger each tool by checking the relevant box.

It can be invoked manually by commenting on any PR:

```
/help
```

## Example usage

Invoke the `help` tool by commenting on a PR with:

![Help tool input](https://codium.ai/images/pr_agent/help1.png){width=750}


Response will include a list of available tools:

![Help tool output](https://codium.ai/images/pr_agent/help2.png){width=750}


================================================
FILE: docs/docs/tools/help_docs.md
================================================
## Overview

The `help_docs` tool can answer a free-text question based on a git documentation folder.

It can be invoked manually by commenting on any PR or Issue:

```
/help_docs "..."
```

Or configured to be triggered automatically when a [new issue is opened](#run-as-a-github-action).

The tool assumes by default that the documentation is located in the root of the repository, at `/docs` folder.
However, this can be customized by setting the `docs_path` configuration option:

```toml
[pr_help_docs]
repo_url = ""                 # The repository to use as context
docs_path = "docs"            # The documentation folder
repo_default_branch = "main"  # The branch to use in case repo_url overwritten

```

See more configuration options in the [Configuration options](#configuration-options) section.

## Example usage

[//]: # (#### Asking a question about this repository:)

[//]: # (![help_docs on the documentation of this repository]&#40;https://codium.ai/images/pr_agent/help_docs_comment.png&#41;{width=512})

**Asking a question about another repository**

![help_docs on the documentation of another repository](https://codium.ai/images/pr_agent/help_docs_comment_explicit_git.png){width=512}

**Response**:

![help_docs response](https://codium.ai/images/pr_agent/help_docs_response.png){width=512}

## Run automatically when a new issue is opened

You can configure PR-Agent to run `help_docs` automatically on any newly created issue.
This can be useful, for example, for providing immediate feedback to users who open issues with questions on open-source projects with extensive documentation.

Here's how:

1) Follow the steps depicted under [Run as a Github Action](../installation/github.md#run-as-a-github-action) to create a new workflow, such as:`.github/workflows/help_docs.yml`:

2) Edit your yaml file to the following:

```yaml
name: Run pr agent on every opened issue, respond to user comments on an issue

#When the action is triggered
on:
  issues:
    types: [opened] #New issue

# Read env. variables
env:
  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  GITHUB_API_URL: ${{ github.api_url }}
  GIT_REPO_URL: ${{ github.event.repository.clone_url }}
  ISSUE_URL: ${{ github.event.issue.html_url || github.event.comment.html_url }}
  ISSUE_BODY: ${{ github.event.issue.body || github.event.comment.body }}
  OPENAI_KEY: ${{ secrets.OPENAI_KEY }}

# The actual set of actions
jobs:
  issue_agent:
    runs-on: ubuntu-latest
    if: ${{ github.event.sender.type != 'Bot' }} #Do not respond to bots

    # Set required permissions
    permissions:
      contents: read    # For reading repository contents
      issues: write     # For commenting on issues

    steps:
      - name: Run PR Agent on Issues
        if: ${{ env.ISSUE_URL != '' }}
        uses: docker://codiumai/pr-agent:latest
        with:
          entrypoint: /bin/bash #Replace invoking cli.py directly with a shell
          args: |
            -c "cd /app && \
            echo 'Running Issue Agent action step on ISSUE_URL=$ISSUE_URL' && \
            export config__git_provider='github' && \
                        export github__user_token=$GITHUB_TOKEN && \
            export github__base_url=$GITHUB_API_URL && \
            export openai__key=$OPENAI_KEY && \
            python -m pr_agent.cli --issue_url=$ISSUE_URL --pr_help_docs.repo_url="..." --pr_help_docs.docs_path="..." --pr_help_docs.openai_key=$OPENAI_KEY && \
            help_docs "$ISSUE_BODY"
```

3) Following completion of the remaining steps (such as adding secrets and relevant configurations, such as `repo_url` and `docs_path`) merge this change to your main branch.
When a new issue is opened, you should see a comment from `github-actions` bot with an auto response, assuming the question is related to the documentation of the repository.

---

## Configuration options

Under the section `pr_help_docs`, the [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L199) contains options to customize the 'help docs' tool:

- `repo_url`: If not overwritten, will use the repo from where the context came from (issue or PR), otherwise - use the given repo as context.
- `repo_default_branch`: The branch to use in case repo_url overwritten, otherwise - has no effect.
- `docs_path`: Relative path from root of repository (either the one this PR has been issued for, or above repo url).
- `exclude_root_readme`:  Whether or not to exclude the root README file for querying the model.
- `supported_doc_exts` : Which file extensions should be included for the purpose of querying the model.

---


================================================
FILE: docs/docs/tools/improve.md
================================================
## Overview

The `improve` tool scans the PR code changes, and automatically generates meaningful suggestions for improving the PR code.
The tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or it can be invoked manually by commenting on any PR:

```toml
/improve
```

## How it looks

=== "Suggestions Overview"
    ![code_suggestions_as_comment_closed](https://codium.ai/images/pr_agent/code_suggestions_as_comment_closed.png){width=512}

=== "Selecting a specific suggestion"
    ![code_suggestions_as_comment_open](https://codium.ai/images/pr_agent/code_suggestions_as_comment_open.png){width=512}

___

## Example usage

### Manual triggering

Invoke the tool manually by commenting `/improve` on any PR. The code suggestions by default are presented as a single comment:

To edit [configurations](#configuration-options) related to the `improve` tool, use the following template:

```toml
/improve --pr_code_suggestions.some_config1=... --pr_code_suggestions.some_config2=...
```

For example, you can choose to present all the suggestions as committable code comments, by running the following command:

```toml
/improve --pr_code_suggestions.commitable_code_suggestions=true
```

![improve](https://codium.ai/images/pr_agent/improve.png){width=512}

### Automatic triggering

To run the `improve` automatically when a PR is opened, define in a [configuration file](../usage-guide/configuration_options.md#wiki-configuration-file):

```toml
[github_app]
pr_commands = [
    "/improve",
    ...
]

[pr_code_suggestions]
num_code_suggestions_per_chunk = ...
...
```

- The `pr_commands` lists commands that will be executed automatically when a PR is opened.
- The `[pr_code_suggestions]` section contains the configurations for the `improve` tool you want to edit (if any)

### Table vs Committable code comments

PR-Agent supports two modes for presenting code suggestions: 

1) [Table](https://codium.ai/images/pr_agent/code_suggestions_as_comment_closed.png) mode 

2) [Inline Committable](https://codium.ai/images/pr_agent/improve.png) code comments mode.

The table format offers several key advantages:

- **Reduced noise**: Creates a cleaner PR experience with less clutter
- **Quick overview and prioritization**: Enables quick review of one-liner summaries, impact levels, and easy prioritization
- **High-level suggestions**: High-level suggestions that aren't tied to specific code chunks are presented only in the table mode
- **Interactive features**: Provides 'more' and 'update' functionality via clickable buttons
- **Centralized tracking**: Shows suggestion implementation status in one place
- **IDE integration**: Allows applying suggestions directly in your IDE via the CLI tool

Table mode is the default of PR-Agent, and is recommended approach for most users due to these benefits. 

![code_suggestions_as_comment_closed.png](https://codium.ai/images/pr_agent/code_suggestions_as_comment_closed.png){width=512}

Teams with specific preferences can enable committable code comments mode in their local configuration, or use [dual publishing mode](#dual-publishing-mode).

> `Note - due to platform limitations, Bitbucket cloud and server supports only committable code comments mode.`


## `Extra instructions` and `best practices`

The `improve` tool can be further customized by providing additional instructions and best practices to the AI model.

### Extra instructions

You can use the `extra_instructions` configuration option to give the AI model additional instructions for the `improve` tool.
Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter.

Examples for possible instructions:

```toml
[pr_code_suggestions]
extra_instructions="""\
(1) Answer in Japanese
(2) Don't suggest to add try-except block
(3) Ignore changes in toml files
...
"""
```

Use triple quotes to write multi-line instructions. Use bullet points or numbers to make the instructions more readable.

### Best practices

`Platforms supported: GitHub, GitLab, Bitbucket`

PR-Agent supports both simple and hierarchical best practices configurations to provide guidance to the AI model for generating relevant code suggestions.

???- tip "Writing effective best practices files"
    
    The following guidelines apply to all best practices files:
    
    - Write clearly and concisely
    - Include brief code examples when helpful with before/after patterns
    - Focus on project-specific guidelines that will result in relevant suggestions you actually want to get
    - Keep each file relatively short, under 800 lines, since:
        - AI models may not process effectively very long documents
        - Long files tend to contain generic guidelines already known to AI
        - Maximum multiple file accumulated content is limited to 2000 lines.
    - Use pattern-based structure rather than simple bullet points for better clarity

???- tip "Example of a best practices file"
 
    Pattern 1: Add proper error handling with try-except blocks around external function calls.
    
    Example code before:

    ```python
    # Some code that might raise an exception
    return process_pr_data(data)
    ```

    Example code after:

    ```python
    try:
        # Some code that might raise an exception
        return process_pr_data(data)
    except Exception as e:
        logger.exception("Failed to process request", extra={"error": e})
    ```

    Pattern 2: Add defensive null/empty checks before accessing object properties or performing operations on potentially null variables to prevent runtime errors.
    
    Example code before:

    ```python
    def get_pr_code(pr_data):
        if "changed_code" in pr_data:
            return pr_data.get("changed_code", "")
        return ""
    ```

    Example code after:

    ```python
    def get_pr_code(pr_data):
        if pr_data is None:
            return ""
        if "changed_code" in pr_data:
            return pr_data.get("changed_code", "")
        return ""
    ```

#### Local best practices

For basic usage, create a `best_practices.md` file in your repository's root directory containing a list of best practices, coding standards, and guidelines specific to your repository.

The AI model will use this `best_practices.md` file as a reference, and in case the PR code violates any of the guidelines, it will create additional suggestions, with a dedicated label: `Organization best practice`.

### Combining 'extra instructions' and 'best practices'

The `extra instructions` configuration is more related to the `improve` tool prompt. It can be used, for example, to avoid specific suggestions ("Don't suggest to add try-except block", "Ignore changes in toml files", ...) or to emphasize specific aspects or formats ("Answer in Japanese", "Give only short suggestions", ...)

In contrast, the `best_practices.md` file is a general guideline for the way code should be written in the repo.

Using a combination of both can help the AI model to provide relevant and tailored suggestions.

## Usage Tips

### Implementing the proposed code suggestions

Each generated suggestion consists of three key elements:

1. A single-line summary of the proposed change
2. An expandable section containing a comprehensive description of the suggestion
3. A diff snippet showing the recommended code modification (before and after)

We advise users to apply critical analysis and judgment when implementing the proposed suggestions.
In addition to mistakes (which may happen, but are rare), sometimes the presented code modification may serve more as an _illustrative example_ than a directly applicable solution.
In such cases, we recommend prioritizing the suggestion's detailed description, using the diff snippet primarily as a supporting reference.

### Dual publishing mode

Our recommended approach for presenting code suggestions is through a [table](./improve.md#overview) (`--pr_code_suggestions.commitable_code_suggestions=false`).
This method significantly reduces the PR footprint and allows for quick and easy digestion of multiple suggestions.

We also offer a complementary **dual publishing mode**. When enabled, suggestions exceeding a certain score threshold are not only displayed in the table, but also presented as committable PR comments.
This mode helps highlight suggestions deemed more critical.

To activate dual publishing mode, use the following setting:

```toml
[pr_code_suggestions]
dual_publishing_score_threshold = x
```

Where x represents the minimum score threshold (>=) for suggestions to be presented as committable PR comments in addition to the table. Default is -1 (disabled).

### Self-review

`Platforms supported: GitHub, GitLab`

If you set in a configuration file:

```toml
[pr_code_suggestions]
demand_code_suggestions_self_review = true
```

The `improve` tool will add a checkbox below the suggestions, prompting user to acknowledge that they have reviewed the suggestions.
You can set the content of the checkbox text via:

```toml
[pr_code_suggestions]
code_suggestions_self_review_text = "... (your text here) ..."
```

![self_review_1](https://codium.ai/images/pr_agent/self_review_1.png){width=512}

!!! tip "Tip - Reducing visual footprint after self-review"

    The configuration parameter `pr_code_suggestions.fold_suggestions_on_self_review` (default is True)
    can be used to automatically fold the suggestions after the user clicks the self-review checkbox.

    This reduces the visual footprint of the suggestions, and also indicates to the PR reviewer that the suggestions have been reviewed by the PR author, and don't require further attention.

!!! tip "Tip - Demanding self-review from the PR author"

    By setting:
    ```toml
    [pr_code_suggestions]
    approve_pr_on_self_review = true
    ```
    the tool can automatically add an approval when the PR author clicks the self-review checkbox.


    - If you set the number of required reviewers for a PR to 2, this effectively means that the PR author must click the self-review checkbox before the PR can be merged (in addition to a human reviewer).

    ![self_review_2](https://codium.ai/images/pr_agent/self_review_2.png){width=512}

    - If you keep the number of required reviewers for a PR to 1 and enable this configuration, this effectively means that the PR author can approve the PR by actively clicking the self-review checkbox.

        To prevent unauthorized approvals, this configuration defaults to false, and cannot be altered through online comments; enabling requires a direct update to the configuration file and a commit to the repository. This ensures that utilizing the feature demands a deliberate documented decision by the repository owner.


### How many code suggestions are generated?

PR-Agent uses a dynamic strategy to generate code suggestions based on the size of the pull request (PR). Here's how it works:

#### 1. Chunking large PRs

- PR-Agent divides large PRs into 'chunks'.
- Each chunk contains up to `config.max_model_tokens` tokens (default: 32,000).

#### 2. Generating suggestions

- For each chunk, PR-Agent generates up to `pr_code_suggestions.num_code_suggestions_per_chunk` suggestions (default: 3).

This approach has two main benefits:

- Scalability: The number of suggestions scales with the PR size, rather than being fixed.
- Quality: By processing smaller chunks, the AI can maintain higher quality suggestions, as larger contexts tend to decrease AI performance.

Note: Chunking is primarily relevant for large PRs. For most PRs (up to 600 lines of code), PR-Agent will be able to process the entire code in a single call.

## Configuration options

???+ example "General options"

    <table>
      <tr>
        <td><b>extra_instructions</b></td>
        <td>Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".</td>
      </tr>
      <tr>
        <td><b>commitable_code_suggestions</b></td>
        <td>If set to true, the tool will display the suggestions as committable code comments. Default is false.</td>
      </tr>
      <tr>
        <td><b>dual_publishing_score_threshold</b></td>
        <td>Minimum score threshold for suggestions to be presented as committable PR comments in addition to the table. Default is -1 (disabled).</td>
      </tr>
      <tr>
        <td><b>focus_only_on_problems</b></td>
        <td>If set to true, suggestions will focus primarily on identifying and fixing code problems, and less on style considerations like best practices, maintainability, or readability. Default is true.</td> 
      </tr>
      <tr>
        <td><b>persistent_comment</b></td>
        <td>If set to true, the improve comment will be persistent, meaning that every new improve request will edit the previous one. Default is true.</td>
      </tr>
      <tr>
        <td><b>suggestions_score_threshold</b></td>
        <td> Any suggestion with importance score less than this threshold will be removed. Default is 0. Highly recommend not to set this value above 7-8, since above it may clip relevant suggestions that can be useful. </td>
      </tr>
      <tr>
        <td><b>enable_help_text</b></td>
        <td>If set to true, the tool will display a help text in the comment. Default is false.</td>
      </tr>
      <tr>
        <td><b>enable_chat_text</b></td>
        <td>If set to true, the tool will display a reference to the PR chat in the comment. Default is false.</td>
      </tr>
      <tr>
        <td><b>publish_output_no_suggestions</b></td>
        <td>If set to true, the tool will publish a comment even if no suggestions were found. Default is true.</td>
      </tr>
    </table>

???+ example "Params for number of suggestions and AI calls"

    <table>
      <tr>
        <td><b>num_code_suggestions_per_chunk</b></td>
        <td>Number of code suggestions provided by the 'improve' tool, per chunk. Default is 3.</td>
      </tr>
      <tr>
        <td><b>max_number_of_calls</b></td>
        <td>Maximum number of chunks. Default is 3.</td>
      </tr>
    </table>

## Understanding AI Code Suggestions

- **AI Limitations:** AI models for code are getting better and better, but they are not flawless. Not all the suggestions will be perfect, and a user should not accept all of them automatically. Critical reading and judgment are required. Mistakes of the AI are rare but can happen, and it is usually quite easy for a human to spot them.
- **Purpose of Suggestions:**
    - **Self-reflection:** The suggestions aim to enable developers to _self-reflect_ and improve their pull requests. This process can help to identify blind spots, uncover missed edge cases, and enhance code readability and coherency. Even when a specific code suggestion isn't suitable, the underlying issue it highlights often reveals something important that might deserve attention.
    - **Bug detection:** The suggestions also alert on any _critical bugs_ that may have been identified during the analysis. This provides an additional safety net to catch potential issues before they make it into production. It's perfectly acceptable to implement only the suggestions you find valuable for your specific context.
- **Hierarchy:** Presenting the suggestions in a structured hierarchical table enables the user to _quickly_ understand them, and to decide which ones are relevant and which are not.
- **Customization:** To guide the model to suggestions that are more relevant to the specific needs of your project, we recommend using the [`extra_instructions`](./improve.md#extra-instructions-and-best-practices) and [`best practices`](./improve.md#best-practices) fields.
- **Model Selection:** For specific programming languages or use cases, some models may perform better than others.


================================================
FILE: docs/docs/tools/index.md
================================================
# Tools

Here is a list of PR-Agent tools, each with a dedicated page that explains how to use it:

| Tool                                                                                     | Description                                                                                                                                 |
|------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------|
| **[PR Description (`/describe`)](./describe.md)**                                        | Automatically generating PR description - title, type, summary, code walkthrough and labels                                                 |
| **[PR Review (`/review`)](./review.md)**                                                 | Adjustable feedback about the PR, possible issues, security concerns, review effort and more                                                |
| **[Code Suggestions (`/improve`)](./improve.md)**                                        | Code suggestions for improving the PR                                                                                                       |
| **[Question Answering (`/ask ...`)](./ask.md)**                                          | Answering free-text questions about the PR, or on specific code lines                                                                       |
| **[Add Documentation (`/add_docs`)](./add_docs.md)**                                     | Generate documentation for code components that are missing it                                                                              |
| **[Generate Labels (`/generate_labels`)](./generate_labels.md)**                         | Generate custom labels for the PR based on the code changes                                                                                 |
| **[Similar Issues (`/similar_issue`)](./similar_issues.md)**                             | Find similar issues in the repository based on the current issue                                                                            |
| **[Help (`/help`)](./help.md)**                                                          | Provides a list of all the available tools                                                                                                  |
| **[Help Docs (`/help_docs`)](./help_docs.md)**                                           | Answer a free-text question based on a git documentation folder                                                                             |
| **[Update Changelog (`/update_changelog`)](./update_changelog.md)**                      | Automatically updating the CHANGELOG.md file with the PR changes                                                                            |


================================================
FILE: docs/docs/tools/review.md
================================================
## Overview

The `review` tool scans the PR code changes, and generates feedback about the PR, aiming to aid the reviewing process.
<br>
The tool can be triggered automatically every time a new PR is [opened](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened), or can be invoked manually by commenting on any PR:

```
/review
```

Note that the main purpose of the `review` tool is to provide the **PR reviewer** with useful feedback and insights. The PR author, in contrast, may prefer to save time and focus on the output of the [improve](./improve.md) tool, which provides actionable code suggestions.

(Read more about the different personas in the PR process and how PR-Agent aims to assist them in our [blog](https://www.codium.ai/blog/understanding-the-challenges-and-pain-points-of-the-pull-request-cycle/))

## Example usage

### Manual triggering

Invoke the tool manually by commenting `/review` on any PR:

![review comment](https://codium.ai/images/pr_agent/review_comment.png){width=512}

After ~30 seconds, the tool will generate a review for the PR:

![review](https://codium.ai/images/pr_agent/review3.png){width=512}

If you want to edit [configurations](#configuration-options), add the relevant ones to the command:

```
/review --pr_reviewer.some_config1=... --pr_reviewer.some_config2=...
```

### Automatic triggering

To run the `review` automatically when a PR is opened, define in a [configuration file](../usage-guide/configuration_options.md#wiki-configuration-file):

```
[github_app]
pr_commands = [
    "/review",
    ...
]

[pr_reviewer]
extra_instructions = "..."
...
```

- The `pr_commands` lists commands that will be executed automatically when a PR is opened.
- The `[pr_reviewer]` section contains the configurations for the `review` tool you want to edit (if any).

## Configuration options

???+ example "General options"

    <table>
      <tr>
        <td><b>persistent_comment</b></td>
        <td>If set to true, the review comment will be persistent, meaning that every new review request will edit the previous one. Default is true.</td>
      </tr>
      <tr>
      <td><b>final_update_message</b></td>
      <td>When set to true, updating a persistent review comment during online commenting will automatically add a short comment with a link to the updated review in the pull request .Default is true.</td>
      </tr>
      <tr>
        <td><b>extra_instructions</b></td>
        <td>Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...".</td>
      </tr>
      <tr>
        <td><b>enable_help_text</b></td>
        <td>If set to true, the tool will display a help text in the comment. Default is false.</td>
      </tr>
      <tr>
        <td><b>num_max_findings</b></td>
        <td>Number of maximum returned findings. Default is 3.</td>
      </tr>
    </table>

???+ example "Enable\\disable specific sub-sections"

    <table>
      <tr>
        <td><b>require_score_review</b></td>
        <td>If set to true, the tool will add a section that scores the PR. Default is false.</td>
      </tr>
      <tr>
        <td><b>require_tests_review</b></td>
        <td>If set to true, the tool will add a section that checks if the PR contains tests. Default is true.</td>
      </tr>
      <tr>
        <td><b>require_estimate_effort_to_review</b></td>
        <td>If set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true.</td>
      </tr>
      <tr>
        <td><b>require_estimate_contribution_time_cost</b></td>
        <td>If set to true, the tool will add a section that estimates the time required for a senior developer to create and submit such changes. Default is false.</td>
      </tr>
      <tr>
        <td><b>require_can_be_split_review</b></td>
        <td>If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false.</td>
      </tr>
      <tr>
        <td><b>require_security_review</b></td>
        <td>If set to true, the tool will add a section that checks if the PR contains a possible security or vulnerability issue. Default is true.</td>
      </tr>
        <tr>
        <td><b>require_todo_scan</b></td>
        <td>If set to true, the tool will add a section that lists TODO comments found in the PR code changes. Default is false.
        </td>
      </tr>
      <tr>
        <td><b>require_ticket_analysis_review</b></td>
        <td>If set to true, and the PR contains a GitHub or Jira ticket link, the tool will add a section that checks if the PR in fact fulfilled the ticket requirements. Default is true.</td>
      </tr>
    </table>

???+ example "Adding PR labels"

    You can enable\disable the `review` tool to add specific labels to the PR:

    <table>
      <tr>
        <td><b>enable_review_labels_security</b></td>
        <td>If set to true, the tool will publish a 'possible security issue' label if it detects a security issue. Default is true.</td>
      </tr>
      <tr>
        <td><b>enable_review_labels_effort</b></td>
        <td>If set to true, the tool will publish a 'Review effort x/5' label (1–5 scale). Default is true.</td>
      </tr>
    </table>

## Usage Tips

### General guidelines

!!! tip ""

    The `review` tool provides a collection of configurable feedbacks about a PR.
    It is recommended to review the [Configuration options](#configuration-options) section, and choose the relevant options for your use case.

    Some of the features that are disabled by default are quite useful, and should be considered for enabling. For example:
    `require_score_review`, and more.

    On the other hand, if you find one of the enabled features to be irrelevant for your use case, disable it. No default configuration can fit all use cases.

### Automation

!!! tip ""
    When you first install PR-Agent app, the [default mode](../usage-guide/automations_and_usage.md#github-app-automatic-tools-when-a-new-pr-is-opened) for the `review` tool is:
    ```
    pr_commands = ["/review", ...]
    ```
    Meaning the `review` tool will run automatically on every PR, without any additional configurations.
    Edit this field to enable/disable the tool, or to change the configurations used.

### Auto-generated PR labels by the Review Tool

!!! tip ""

    The `review` can tool automatically add labels to your Pull Requests:

    - **`possible security issue`**: This label is applied if the tool detects a potential [security vulnerability](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml#L121) in the PR's code. This feedback is controlled by the 'enable_review_labels_security' flag (default is true).
    - **`review effort [x/5]`**: This label estimates the [effort](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml#L105) required to review the PR on a relative scale of 1 to 5, where 'x' represents the assessed effort. This feedback is controlled by the 'enable_review_labels_effort' flag (default is true).
    - **`ticket compliance`**: Adds a label indicating code compliance level ("Fully compliant" | "PR Code Verified" | "Partially compliant" | "Not compliant") to any GitHub/Jira/Linea ticket linked in the PR. Controlled by the 'require_ticket_labels' flag (default: false). If 'require_no_ticket_labels' is also enabled, PRs without ticket links will receive a "No ticket found" label.


### Auto-blocking PRs from being merged based on the generated labels

!!! tip ""

    You can configure a CI/CD Action to prevent merging PRs with specific labels. For example, implement a dedicated [GitHub Action](https://medium.com/sequra-tech/quick-tip-block-pull-request-merge-using-labels-6cc326936221).

    This approach helps ensure PRs with potential security issues or ticket compliance problems will not be merged without further review.

    Since AI may make mistakes or lack complete context, use this feature judiciously. For flexibility, users with appropriate permissions can remove generated labels when necessary. When a label is removed, this action will be automatically documented in the PR discussion, clearly indicating it was a deliberate override by an authorized user to allow the merge.

### Extra instructions

!!! tip "" 

    Extra instructions are important.
    The `review` tool can be configured with extra instructions, which can be used to guide the model to a feedback tailored to the needs of your project.

    Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Specify the relevant sub-tool, and the relevant aspects of the PR that you want to emphasize.

    Examples of extra instructions:
    ```
    [pr_reviewer]
    extra_instructions="""\
    In the code feedback section, emphasize the following:
    - Does the code logic cover relevant edge cases?
    - Is the code logic clear and easy to understand?
    - Is the code logic efficient?
    ...
    """
    ```
    Use triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable.


================================================
FILE: docs/docs/tools/similar_issues.md
================================================
## Overview

The similar issue tool retrieves the most similar issues to the current issue.
It can be invoked manually by commenting on any PR:

```
/similar_issue
```

## Example usage

![similar_issue_original_issue](https://codium.ai/images/pr_agent/similar_issue_original_issue.png){width=768}

![similar_issue_comment](https://codium.ai/images/pr_agent/similar_issue_comment.png){width=768}

![similar_issue](https://codium.ai/images/pr_agent/similar_issue.png){width=768}

Note that to perform retrieval, the `similar_issue` tool indexes all the repo previous issues (once).

### Selecting a Vector Database

Configure your preferred database by changing the `pr_similar_issue` parameter in `configuration.toml` file.

#### Available Options

Choose from the following Vector Databases:

1. LanceDB
2. Pinecone
3. Qdrant

#### Pinecone Configuration

To use Pinecone with the `similar issue` tool, add these credentials to `.secrets.toml` (or set as environment variables):

```
[pinecone]
api_key = "..."
environment = "..."
```

These parameters can be obtained by registering to [Pinecone](https://app.pinecone.io/?sessionType=signup/).

#### Qdrant Configuration

To use Qdrant with the `similar issue` tool, add these credentials to `.secrets.toml` (or set as environment variables):

```
[qdrant]
url = "https://YOUR-QDRANT-URL" # e.g., https://xxxxxxxx-xxxxxxxx.eu-central-1-0.aws.cloud.qdrant.io
api_key = "..."
```

Then select Qdrant in `configuration.toml`:

```
[pr_similar_issue]
vectordb = "qdrant"
```

You can get a free managed Qdrant instance from [Qdrant Cloud](https://cloud.qdrant.io/).

## How to use

- To invoke the 'similar issue' tool from **CLI**, run:
`python3 cli.py --issue_url=... similar_issue`

- To invoke the 'similar' issue tool via online usage, [comment](https://github.com/qodo-ai/pr-agent/issues/178#issuecomment-1716934893) on a PR:
`/similar_issue`

- You can also enable the 'similar issue' tool to run automatically when a new issue is opened, by adding it to the [pr_commands list in the github_app section](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L229)


================================================
FILE: docs/docs/tools/update_changelog.md
================================================
## Overview

The `update_changelog` tool automatically updates the CHANGELOG.md file with the PR changes.
It can be invoked manually by commenting on any PR:

```
/update_changelog
```

## Example usage

![update_changelog_comment](https://codium.ai/images/pr_agent/update_changelog_comment.png){width=768}

![update_changelog](https://codium.ai/images/pr_agent/update_changelog.png){width=768}

## Configuration options

Under the section `pr_update_changelog`, the [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L169) contains options to customize the 'update changelog' tool:

- `push_changelog_changes`: whether to push the changes to CHANGELOG.md, or just publish them as a comment. Default is false (publish as comment).
- `extra_instructions`: Optional extra instructions to the tool. For example: "Use the following structure: ..."
- `add_pr_link`: whether the model should try to add a link to the PR in the changelog. Default is true.
- `skip_ci_on_push`: whether the commit message (when `push_changelog_changes` is true) will include the term "[skip ci]", preventing CI tests to be triggered on the changelog commit. Default is true.


================================================
FILE: docs/docs/usage-guide/EXAMPLE_BEST_PRACTICE.md
================================================
## Recommend Python Best Practices

This document outlines a series of recommended best practices for Python development. These guidelines aim to improve code quality, maintainability, and readability.

### Imports

Use  `import`  statements for packages and modules only, not for individual types, classes, or functions.

#### Definition

Reusability mechanism for sharing code from one module to another.

#### Decision

- Use  `import x`  for importing packages and modules.
- Use  `from x import y`  where  `x`  is the package prefix and  `y`  is the module name with no prefix.
- Use  `from x import y as z`  in any of the following circumstances:
  - Two modules named  `y`  are to be imported.
  - `y`  conflicts with a top-level name defined in the current module.
  - `y`  conflicts with a common parameter name that is part of the public API (e.g.,  `features`).
  - `y`  is an inconveniently long name, or too generic in the context of your code
- Use  `import y as z`  only when  `z`  is a standard abbreviation (e.g.,  `import numpy as np`).

For example the module  `sound.effects.echo`  may be imported as follows:

```
from sound.effects import echo
...
echo.EchoFilter(input, output, delay=0.7, atten=4)

```

Do not use relative names in imports. Even if the module is in the same package, use the full package name. This helps prevent unintentionally importing a package twice.

##### Exemptions

Exemptions from this rule:

- Symbols from the following modules are used to support static analysis and type checking:
  - [`typing`  module](https://google.github.io/styleguide/pyguide.html#typing-imports)
  - [`collections.abc`  module](https://google.github.io/styleguide/pyguide.html#typing-imports)
  - [`typing_extensions`  module](https://github.com/python/typing_extensions/blob/main/README.md)
- Redirects from the  [six.moves module](https://six.readthedocs.io/#module-six.moves).

### Packages

Import each module using the full pathname location of the module.

#### Decision

All new code should import each module by its full package name.

Imports should be as follows:

```
Yes:
  # Reference absl.flags in code with the complete name (verbose).
  import absl.flags
  from doctor.who import jodie

  _FOO = absl.flags.DEFINE_string(...)

```

```
Yes:
  # Reference flags in code with just the module name (common).
  from absl import flags
  from doctor.who import jodie

  _FOO = flags.DEFINE_string(...)

```

_(assume this file lives in  `doctor/who/`  where  `jodie.py`  also exists)_

```
No:
  # Unclear what module the author wanted and what will be imported.  The actual
  # import behavior depends on external factors controlling sys.path.
  # Which possible jodie module did the author intend to import?
  import jodie

```

The directory the main binary is located in should not be assumed to be in  `sys.path`  despite that happening in some environments. This being the case, code should assume that  `import jodie`  refers to a third-party or top-level package named  `jodie`, not a local  `jodie.py`.

### Default Iterators and Operators

Use default iterators and operators for types that support them, like lists, dictionaries, and files.

#### Definition

Container types, like dictionaries and lists, define default iterators and membership test operators (“in” and “not in”).

#### Decision

Use default iterators and operators for types that support them, like lists, dictionaries, and files. The built-in types define iterator methods, too. Prefer these methods to methods that return lists, except that you should not mutate a container while iterating over it.

```
Yes:  for key in adict: ...
      if obj in alist: ...
      for line in afile: ...
      for k, v in adict.items(): ...
```

```
No:   for key in adict.keys(): ...
      for line in afile.readlines(): ...
```

### Lambda Functions

Okay for one-liners. Prefer generator expressions over  `map()`  or  `filter()`  with a  `lambda`.

#### Decision

Lambdas are allowed. If the code inside the lambda function spans multiple lines or is longer than 60-80 chars, it might be better to define it as a regular  [nested function](https://google.github.io/styleguide/pyguide.html#lexical-scoping).

For common operations like multiplication, use the functions from the  `operator`  module instead of lambda functions. For example, prefer  `operator.mul`  to  `lambda x, y: x * y`.

### Default Argument Values

Okay in most cases.

#### Definition

You can specify values for variables at the end of a function’s parameter list, e.g.,  `def foo(a, b=0):`. If  `foo`  is called with only one argument,  `b`  is set to 0. If it is called with two arguments,  `b`  has the value of the second argument.

#### Decision

Okay to use with the following caveat:

Do not use mutable objects as default values in the function or method definition.

```
Yes: def foo(a, b=None):
         if b is None:
             b = []
Yes: def foo(a, b: Sequence | None = None):
         if b is None:
             b = []
Yes: def foo(a, b: Sequence = ()):  # Empty tuple OK since tuples are immutable.
         ...
```

```
from absl import flags
_FOO = flags.DEFINE_string(...)

No:  def foo(a, b=[]):
         ...
No:  def foo(a, b=time.time()):  # Is `b` supposed to represent when this module was loaded?
         ...
No:  def foo(a, b=_FOO.value):  # sys.argv has not yet been parsed...
         ...
No:  def foo(a, b: Mapping = {}):  # Could still get passed to unchecked code.
         ...
```

### True/False Evaluations

Use the “implicit” false if possible, e.g.,  `if foo:`  rather than  `if foo != []:`

### Lexical Scoping

Okay to use.

An example of the use of this feature is:

```
def get_adder(summand1: float) -> Callable[[float], float]:
    """Returns a function that adds numbers to a given number."""
    def adder(summand2: float) -> float:
        return summand1 + summand2

    return adder
```

#### Decision

Okay to use.

### Threading

Do not rely on the atomicity of built-in types.

While Python’s built-in data types such as dictionaries appear to have atomic operations, there are corner cases where they aren’t atomic (e.g. if  `__hash__`  or  `__eq__`  are implemented as Python methods) and their atomicity should not be relied upon. Neither should you rely on atomic variable assignment (since this in turn depends on dictionaries).

Use the  `queue`  module’s  `Queue`  data type as the preferred way to communicate data between threads. Otherwise, use the  `threading`  module and its locking primitives. Prefer condition variables and  `threading.Condition`  instead of using lower-level locks.


================================================
FILE: docs/docs/usage-guide/additional_configurations.md
================================================
## Show possible configurations

The possible configurations of PR-Agent are stored in [here](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml){:target="_blank"}.
In the [tools](../tools/index.md) page you can find explanations on how to use these configurations for each tool.

To print all the available configurations as a comment on your PR, you can use the following command:

```
/config
```

![possible_config1](https://codium.ai/images/pr_agent/possible_config1.png){width=512}

To view the **actual** configurations used for a specific tool, after all the user settings are applied, you can add for each tool a `--config.output_relevant_configurations=true` suffix.
For example:

```
/improve --config.output_relevant_configurations=true
```

Will output an additional field showing the actual configurations used for the `improve` tool.

![possible_config2](https://codium.ai/images/pr_agent/possible_config2.png){width=512}

## Ignoring files from analysis

In some cases, you may want to exclude specific files or directories from the analysis performed by PR-Agent. This can be useful, for example, when you have files that are generated automatically or files that shouldn't be reviewed, like vendor code.

You can ignore files or folders using the following methods:

- `IGNORE.GLOB`
- `IGNORE.REGEX`

which you can edit to ignore files or folders based on glob or regex patterns.

### Example usage

Let's look at an example where we want to ignore all files with `.py` extension from the analysis.

To ignore Python files in a PR with online usage, comment on a PR:
`/review --ignore.glob="['*.py']"`

To ignore Python files in all PRs using `glob` pattern, set in a configuration file:

```
[ignore]
glob = ['*.py']
```

And to ignore Python files in all PRs using `regex` pattern, set in a configuration file:

```
[ignore]
regex = ['.*\.py$']
```

## Extra instructions

All PR-Agent tools have a parameter called `extra_instructions`, that enables to add free-text extra instructions. Example usage:

```
/update_changelog --pr_update_changelog.extra_instructions="Make sure to update also the version ..."
```

## Language Settings

The default response language for PR-Agent is **U.S. English**. However, some development teams may prefer to display information in a different language. For example, your team's workflow might improve if PR descriptions and code suggestions are set to your country's native language.

To configure this, set the `response_language` parameter in the configuration file. This will prompt the model to respond in the specified language. Use a **standard locale code** based on [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166) (country codes) and [ISO 639](https://en.wikipedia.org/wiki/ISO_639) (language codes) to define a language-country pair. See this [comprehensive list of locale codes](https://simplelocalize.io/data/locales/).

Example:

```toml
[config]
response_language = "it-IT"
```

This will set the response language globally for all the commands to Italian.

> **Important:** Note that only dynamic text generated by the AI model is translated to the configured language. Static text such as labels and table headers that are not part of the AI models response will remain in US English. In addition, the model you are using must have good support for the specified language.

[//]: # (## Working with large PRs)

[//]: # ()
[//]: # (The default mode of CodiumAI is to have a single call per tool, using GPT-4, which has a token limit of 8000 tokens.)

[//]: # (This mode provides a very good speed-quality-cost tradeoff, and can handle most PRs successfully.)

[//]: # (When the PR is above the token limit, it employs a [PR Compression strategy]&#40;../core-abilities/index.md&#41;.)

[//]: # ()
[//]: # (However, for very large PRs, or in case you want to emphasize quality over speed and cost, there are two possible solutions:)

[//]: # (1&#41; [Use a model]&#40;./changing_a_model.md&#41; with larger context, like GPT-32K, or claude-100K. This solution will be applicable for all the tools.)

[//]: # (2&#41; For the `/improve` tool, there is an ['extended' mode]&#40;../tools/improve.md&#41; &#40;`/improve --extended`&#41;,)

[//]: # (which divides the PR into chunks, and processes each chunk separately. With this mode, regardless of the model, no compression will be done &#40;but for large PRs, multiple model calls may occur&#41;)


## Expand GitLab submodule diffs

By default, GitLab merge requests show submodule updates as `Subproject commit` lines. To include the actual file-level changes from those submodules in PR-Agent analysis, enable:

```toml
[gitlab]
expand_submodule_diffs = true
```

When enabled, PR-Agent will fetch and attach diffs from the submodule repositories. The default is `false` to avoid extra GitLab API calls.

## Log Level

PR-Agent allows you to control the verbosity of logging by using the `log_level` configuration parameter. This is particularly useful for troubleshooting and debugging issues with your PR workflows.

```
[config]
log_level = "DEBUG"  # Options: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"
```

The default log level is "DEBUG", which provides detailed output of all operations. If you prefer less verbose logs, you can set higher log levels like "INFO" or "WARNING".

## Integrating with Logging Observability Platforms

Various logging observability tools can be used out-of-the box when using the default LiteLLM AI Handler. Simply configure the LiteLLM callback settings in `configuration.toml` and set environment variables according to the LiteLLM [documentation](https://docs.litellm.ai/docs/).

For example, to use [LangSmith](https://www.langchain.com/langsmith) you can add the following to your `configuration.toml` file:

```
[litellm]
enable_callbacks = true
success_callback = ["langsmith"]
failure_callback = ["langsmith"]
service_callback = []
```

Then set the following environment variables:

```
LANGSMITH_API_KEY=<api_key>
LANGSMITH_PROJECT=<project>
LANGSMITH_BASE_URL=<url>
```

## Bringing additional repository metadata to PR-Agent

To provide PR-Agent tools with additional context about your project, you can enable automatic repository metadata detection. 

If you set:

```toml
[config]
add_repo_metadata = true
```

PR-Agent automatically searches for repository metadata files in your PR's head branch root directory. By default, it looks for:
[AGENTS.MD](https://agents.md/), [QODO.MD](https://docs.codium.ai/qodo-documentation/qodo-command/getting-started/setup-and-quickstart), [CLAUDE.MD](https://www.anthropic.com/engineering/claude-code-best-practices).

You can also specify custom filenames to search for:

```toml
[config]
add_repo_metadata_file_list= ["file1.md", "file2.md", ...]
```

## Ignoring automatic commands in PRs

PR-Agent allows you to automatically ignore certain PRs based on various criteria:

- PRs with specific titles (using regex matching)
- PRs between specific branches (using regex matching)
- PRs from specific repositories (using regex matching)
- PRs not from specific folders
- PRs containing specific labels
- PRs opened by specific users

### Ignoring PRs with specific titles

To ignore PRs with a specific title such as "[Bump]: ...", you can add the following to your `configuration.toml` file:

```toml
[config]
ignore_pr_title = ["\\[Bump\\]"]
```

Where the `ignore_pr_title` is a list of regex patterns to match the PR title you want to ignore. Default is `ignore_pr_title = ["^\\[Auto\\]", "^Auto"]`.

### Ignoring PRs between specific branches

To ignore PRs from specific source or target branches, you can add the following to your `configuration.toml` file:

```toml
[config]
ignore_pr_source_branches = ['develop', 'main', 'master', 'stage']
ignore_pr_target_branches = ["qa"]
```

Where the `ignore_pr_source_branches` and `ignore_pr_target_branches` are lists of regex patterns to match the source and target branches you want to ignore.
They are not mutually exclusive, you can use them together or separately.

### Ignoring PRs from specific repositories

To ignore PRs from specific repositories, you can add the following to your `configuration.toml` file:

```toml
[config]
ignore_repositories = ["my-org/my-repo1", "my-org/my-repo2"]
```

Where the `ignore_repositories` is a list of regex patterns to match the repositories you want to ignore. This is useful when you have multiple repositories and want to exclude certain ones from analysis.


### Ignoring PRs not from specific folders

To allow only specific folders (often needed in large monorepos), set:

```
[config]
allow_only_specific_folders=['folder1','folder2']
```

For the configuration above, automatic feedback will only be triggered when the PR changes include files where 'folder1' or 'folder2' is in the file path

### Ignoring PRs containing specific labels

To ignore PRs containing specific labels, you can add the following to your `configuration.toml` file:

```
[config]
ignore_pr_labels = ["do-not-merge"]
```

Where the `ignore_pr_labels` is a list of labels that when present in the PR, the PR will be ignored.

### Ignoring PRs from specific users

PR-Agent tries to automatically identify and ignore pull requests created by bots using:

- GitHub's native bot detection system
- Name-based pattern matching

While this detection is robust, it may not catch all cases, particularly when:

- Bots are registered as regular user accounts
- Bot names don't match common patterns

To supplement the automatic bot detection, you can manually specify users to ignore. Add the following to your `configuration.toml` file to ignore PRs from specific users:

```
[config]
ignore_pr_authors = ["my-special-bot-user", ...]
```

Where the `ignore_pr_authors` is a regex list of usernames that you want to ignore.

!!! note
    There is one specific case where bots will receive an automatic response - when they generated a PR with a _failed test_.

### Ignoring Generated Files by Language/Framework

To automatically exclude files generated by specific languages or frameworks, you can add the following to your `configuration.toml` file:

```
[config]
ignore_language_framework = ['protobuf', ...]
```

You can view the list of auto-generated file patterns in [`generated_code_ignore.toml`](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/generated_code_ignore.toml).
Files matching these glob patterns will be automatically excluded from PR Agent analysis.

### Ignoring Tickets with Specific Labels

When PR-Agent analyzes tickets (JIRA, GitHub Issues, GitLab Issues, etc.) referenced in your PR, you may want to exclude tickets that have certain labels from the analysis. This is useful for filtering out tickets marked as "ignore-compliance", "skip-review", or other labels that indicate the ticket should not be considered during PR review.

To ignore tickets with specific labels, add the following to your `configuration.toml` file:

```toml
[config]
ignore_ticket_labels = ["ignore-compliance", "skip-review", "wont-fix"]
```

Where `ignore_ticket_labels` is a list of label names that should be ignored during ticket analysis.


================================================
FILE: docs/docs/usage-guide/automations_and_usage.md
================================================
## Local repo (CLI)

When running from your locally cloned PR-Agent repo (CLI), your local configuration file will be used.
Examples of invoking the different tools via the CLI:

- **Review**:       `python -m pr_agent.cli --pr_url=<pr_url>  review`
- **Describe**:     `python -m pr_agent.cli --pr_url=<pr_url>  describe`
- **Improve**:      `python -m pr_agent.cli --pr_url=<pr_url>  improve`
- **Ask**:          `python -m pr_agent.cli --pr_url=<pr_url>  ask "Write me a poem about this PR"`
- **Update Changelog**:      `python -m pr_agent.cli --pr_url=<pr_url>  update_changelog`

`<pr_url>` is the url of the relevant PR (for example: [#50](https://github.com/qodo-ai/pr-agent/pull/50)).

**Notes:**

1. in addition to editing your local configuration file, you can also change any configuration value by adding it to the command line:

```
python -m pr_agent.cli --pr_url=<pr_url>  /review --pr_reviewer.extra_instructions="focus on the file: ..."
```

2. You can print results locally, without publishing them, by setting in `configuration.toml`:

```
[config]
publish_output=false
verbosity_level=2
```

This is useful for debugging or experimenting with different tools.

3. **git provider**: The [git_provider](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L12) field in a configuration file determines the GIT provider that will be used by PR-Agent. Currently, the following providers are supported:
`github` **(default)**, `gitlab`, `bitbucket`, `azure`, `codecommit`, `local`, and `gitea`.

### CLI Health Check

To verify that PR-Agent has been configured correctly, you can run this health check command from the repository root:

```bash
python -m tests.health_test.main
```

If the health check passes, you will see the following output:

```
========
Health test passed successfully
========
```

At the end of the run.

Before running the health check, ensure you have:

- Configured your [LLM provider](./changing_a_model.md)
- Added a valid GitHub token to your configuration file

## Online usage

Online usage means invoking PR-Agent tools by [comments](https://github.com/qodo-ai/pr-agent/pull/229#issuecomment-1695021901) on a PR.
Commands for invoking the different tools via comments:

- **Review**:       `/review`
- **Describe**:     `/describe`
- **Improve**:      `/improve`  (or `/improve_code` for bitbucket, since `/improve` is sometimes reserved)
- **Ask**:          `/ask "..."`
- **Update Changelog**:      `/update_changelog`

To edit a specific configuration value, just add `--config_path=<value>` to any command.
For example, if you want to edit the `review` tool configurations, you can run:

```
/review --pr_reviewer.extra_instructions="..." --pr_reviewer.require_score_review=false
```

Any configuration value in [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) file can be similarly edited. Comment `/config` to see the list of available configurations.

## PR-Agent Automatic Feedback

### Disabling all automatic feedback

To easily disable all automatic feedback from PR-Agent (GitHub App, GitLab Webhook, BitBucket App, Azure DevOps Webhook), set in a configuration file:

```toml
[config]
disable_auto_feedback = true
```

When this parameter is set to `true`, PR-Agent will not run any automatic tools (like `describe`, `review`, `improve`) when a new PR is opened, or when new code is pushed to an open PR.

### GitHub App

!!! note "Configurations for PR-Agent"
    PR-Agent for GitHub is an App, hosted by Codium. So all the instructions below are relevant for PR-Agent users.
    Same goes for [GitLab webhook](#gitlab-webhook) and [BitBucket App](#bitbucket-app) sections.

#### GitHub app automatic tools when a new PR is opened

The [github_app](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L223) section defines GitHub app specific configurations.

The configuration parameter `pr_commands` defines the list of tools that will be **run automatically** when a new PR is opened:

```toml
[github_app]
pr_commands = [
    "/describe",
    "/review",
    "/improve",
]
```

This means that when a new PR is opened/reopened or marked as ready for review, PR-Agent will run the `describe`, `review` and `improve` tools.  

**Draft PRs:** 

By default, draft PRs are not considered for automatic tools, but you can change this by setting the `feedback_on_draft_pr` parameter to `true` in the configuration file.

```toml
[github_app]
feedback_on_draft_pr = true
```

**Changing default tool parameters:**

You can override the default tool parameters by using one the three options for a [configuration file](./configuration_options.md): **wiki**, **local**, or **global**.
For example, if your configuration file contains:

```toml
[pr_description]
generate_ai_title = true
```

Every time you run the `describe` tool (including automatic runs) the PR title will be generated by the AI.


**Parameters for automated runs:**

You can customize configurations specifically for automated runs by using the `--config_path=<value>` parameter.
For instance, to modify the `review` tool settings only for newly opened PRs, use:

```toml
[github_app]
pr_commands = [
    "/describe",
    "/review --pr_reviewer.extra_instructions='focus on the file: ...'",
    "/improve",
]
```

#### GitHub app automatic tools for push actions (commits to an open PR)

In addition to running automatic tools when a PR is opened, the GitHub app can also respond to new code that is pushed to an open PR.

The configuration toggle `handle_push_trigger` can be used to enable this feature.
The configuration parameter `push_commands` defines the list of tools that will be **run automatically** when new code is pushed to the PR.

```toml
[github_app]
handle_push_trigger = true
push_commands = [
    "/describe",
    "/review",
]
```

This means that when new code is pushed to the PR, PR-Agent will run the `describe` and `review` tools, with the specified parameters.

### GitHub Action

`GitHub Action` is a different way to trigger PR-Agent tools, and uses a different configuration mechanism than `GitHub App`.<br>
You can configure settings for `GitHub Action` by adding environment variables under the env section in `.github/workflows/pr_agent.yml` file.
Specifically, start by setting the following environment variables:

```yaml
      env:
        OPENAI_KEY: ${{ secrets.OPENAI_KEY }} # Make sure to add your OpenAI key to your repo secrets
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Make sure to add your GitHub token to your repo secrets
        github_action_config.auto_review: "true" # enable\disable auto review
        github_action_config.auto_describe: "true" # enable\disable auto describe
        github_action_config.auto_improve: "true" # enable\disable auto improve
        github_action_config.pr_actions: '["opened", "reopened", "ready_for_review", "review_requested"]'
```

`github_action_config.auto_review`, `github_action_config.auto_describe` and `github_action_config.auto_improve` are used to enable/disable automatic tools that run when a new PR is opened.
If not set, the default configuration is for all three tools to run automatically when a new PR is opened.

`github_action_config.pr_actions` is used to configure which `pull_requests` events will trigger the enabled auto flags
If not set, the default configuration is `["opened", "reopened", "ready_for_review", "review_requested"]`

`github_action_config.enable_output` are used to enable/disable github actions [output parameter](https://docs.github.com/en/actions/creating-actions/metadata-syntax-for-github-actions#outputs-for-docker-container-and-javascript-actions) (default is `true`).
Review result is output as JSON to `steps.{step-id}.outputs.review` property.
The JSON structure is equivalent to the yaml data structure defined in [pr_reviewer_prompts.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/pr_reviewer_prompts.toml).

Note that you can give additional config parameters by adding environment variables to `.github/workflows/pr_agent.yml`, or by using a `.pr_agent.toml` [configuration file](./configuration_options.md#global-configuration-file) in the root of your repo

For example, you can set an environment variable: `pr_description.publish_labels=false`, or add a `.pr_agent.toml` file with the following content:

```toml
[pr_description]
publish_labels = false
```

to prevent PR-Agent from publishing labels when running the `describe` tool.

#### Enable using commands in PR

You can configure your GitHub Actions workflow to trigger on `issue_comment` [events](https://docs.github.com/en/actions/reference/workflows-and-actions/events-that-trigger-workflows#issue_comment) (`created` and `edited`).

Example GitHub Actions workflow configuration:

```yaml
on:
  issue_comment:
    types: [created, edited]
```

When this is configured, PR-Agent can be invoked by commenting on the PR.

#### Quick Reference: Model Configuration in GitHub Actions

For detailed step-by-step examples of configuring different models (Gemini, Claude, Azure OpenAI, etc.) in GitHub Actions, see the [Configuration Examples](../installation/github.md#configuration-examples) section in the installation guide.

**Common Model Configuration Patterns:**

- **OpenAI**: Set `config.model: "gpt-4o"` and `OPENAI_KEY`
- **Gemini**: Set `config.model: "gemini/gemini-1.5-flash"` and `GOOGLE_AI_STUDIO.GEMINI_API_KEY` (no `OPENAI_KEY` needed)
- **Claude**: Set `config.model: "anthropic/claude-3-opus-20240229"` and `ANTHROPIC.KEY` (no `OPENAI_KEY` needed)
- **Azure OpenAI**: Set `OPENAI.API_TYPE: "azure"`, `OPENAI.API_BASE`, and `OPENAI.DEPLOYMENT_ID`
- **Local Models**: Set `config.model: "ollama/model-name"` and `OLLAMA.API_BASE`

**Environment Variable Format:**
- Use dots (`.`) to separate sections and keys: `config.model`, `pr_reviewer.extra_instructions`
- Boolean values as strings: `"true"` or `"false"`
- Arrays as JSON strings: `'["item1", "item2"]'`

For complete model configuration details, see [Changing a model in PR-Agent](changing_a_model.md).

### GitLab Webhook

After setting up a GitLab webhook, to control which commands will run automatically when a new MR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:

```toml
[gitlab]
pr_commands = [
    "/describe",
    "/review",
    "/improve",
]
```

the GitLab webhook can also respond to new code that is pushed to an open MR.
The configuration toggle `handle_push_trigger` can be used to enable this feature.
The configuration parameter `push_commands` defines the list of tools that will be **run automatically** when new code is pushed to the MR.

```toml
[gitlab]
handle_push_trigger = true
push_commands = [
    "/describe",
    "/review",
]
```

Note that to use the 'handle_push_trigger' feature, you need to give the gitlab webhook also the "Push events" scope.

### BitBucket App

Similar to GitHub app, when running PR-Agent from BitBucket App, the default [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) will be initially loaded.

By uploading a local `.pr_agent.toml` file to the root of the repo's default branch, you can edit and customize any configuration parameter. Note that you need to upload `.pr_agent.toml` prior to creating a PR, in order for the configuration to take effect.

For example, if your local `.pr_agent.toml` file contains:

```toml
[pr_reviewer]
extra_instructions = "Answer in japanese"
```

Each time you invoke a `/review` tool, it will use the extra instructions you set in the local configuration file.

Note that among other limitations, BitBucket provides relatively low rate-limits for applications (up to 1000 requests per hour), and does not provide an API to track the actual rate-limit usage.
If you experience a lack of responses from PR-Agent, you might want to set: `bitbucket_app.avoid_full_files=true` in your configuration file.
This will prevent PR-Agent from acquiring the full file content, and will only use the diff content. This will reduce the number of requests made to BitBucket, at the cost of small decrease in accuracy, as dynamic context will not be applicable.

#### BitBucket Self-Hosted App automatic tools

To control which commands will run automatically when a new PR is opened, you can set the `pr_commands` parameter in the configuration file:
Specifically, set the following values:

```toml
[bitbucket_app]
pr_commands = [
    "/review",
    "/improve --pr_code_suggestions.commitable_code_suggestions=true --pr_code_suggestions.suggestions_score_threshold=7",
]
```

Note that we set specifically for bitbucket, we recommend using: `--pr_code_suggestions.suggestions_score_threshold=7` and that is the default value we set for bitbucket.
Since this platform only supports inline code suggestions, we want to limit the number of suggestions, and only present a limited number.

To enable BitBucket app to respond to each **push** to the PR, set (for example):

```toml
[bitbucket_app]
handle_push_trigger = true
push_commands = [
    "/describe",
    "/review",
]
```

### Azure DevOps provider

To use Azure DevOps provider use the following settings in configuration.toml:

```toml
[config]
git_provider="azure"
```

Azure DevOps provider supports [PAT token](https://learn.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=Windows) or [DefaultAzureCredential](https://learn.microsoft.com/en-us/azure/developer/python/sdk/authentication-overview#authentication-in-server-environments) authentication.
PAT is faster to create, but has build in expiration date, and will use the user identity for API calls.
Using DefaultAzureCredential you can use managed identity or Service principle, which are more secure and will create separate ADO user identity (via AAD) to the agent.

If PAT was chosen, you can assign the value in .secrets.toml.
If DefaultAzureCredential was chosen, you can assigned the additional env vars like AZURE_CLIENT_SECRET directly,
or use managed identity/az cli (for local development) without any additional configuration.
in any case, 'org' value must be assigned in .secrets.toml:

```
[azure_devops]
org = "https://dev.azure.com/YOUR_ORGANIZATION/"
# pat = "YOUR_PAT_TOKEN" needed only if using PAT for authentication
```

#### Azure DevOps Webhook

To control which commands will run automatically when a new PR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:

```toml
[azure_devops_server]
pr_commands = [
    "/describe",
    "/review",
    "/improve",
]
```

### Gitea Webhook

After setting up a Gitea webhook, to control which commands will run automatically when a new MR is opened, you can set the `pr_commands` parameter in the configuration file, similar to the GitHub App:

```toml
[gitea]
pr_commands = [
    "/describe",
    "/review",
    "/improve",
]
```


================================================
FILE: docs/docs/usage-guide/changing_a_model.md
================================================
## Changing a model in PR-Agent

See [here](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/algo/__init__.py) for a list of supported models in PR-Agent.
The default model of PR-Agent is `GPT-5` from OpenAI.
To use a different model than the default, you need to edit in the [configuration file](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L7) the fields:

```toml
[config]
model = "..."
fallback_models = ["..."]
```

For models and environments not from OpenAI, you might need to provide additional keys and other parameters.
You can give parameters via a configuration file, or from environment variables.

!!! note "Model-specific environment variables"
    See [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms) for the environment variables needed per model, as they may vary and change over time. Our documentation per-model may not always be up-to-date with the latest changes.
    Failing to set the needed keys of a specific model will usually result in litellm not identifying the model type, and failing to utilize it.

### OpenAI like API
To use an OpenAI like API, set the following in your `.secrets.toml` file:

```toml
[openai]
api_base = "https://api.openai.com/v1"
api_key = "sk-..."
```

or use the environment variables (make sure to use double underscores `__`):

```bash
OPENAI__API_BASE=https://api.openai.com/v1
OPENAI__KEY=sk-...
```

### OpenAI Flex Processing

To reduce costs for non-urgent/background tasks, enable Flex Processing:

```toml
[litellm]
extra_body='{"processing_mode": "flex"}'
```

See [OpenAI Flex Processing docs](https://platform.openai.com/docs/guides/flex-processing) for details.

### Azure

To use Azure, set in your `.secrets.toml` (working from CLI), or in the GitHub `Settings > Secrets and variables` (working from GitHub App or GitHub Action):

```toml
[openai]
key = "" # your azure api key
api_type = "azure"
api_version = '2023-05-15'  # Check Azure documentation for the current API version
api_base = ""  # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
deployment_id = ""  # The deployment name you chose when you deployed the engine
```

and set in your configuration file:

```toml
[config]
model="" # the OpenAI model you've deployed on Azure (e.g. gpt-4o)
fallback_models=["..."]
```

To use Azure AD (Entra id) based authentication set in your `.secrets.toml` (working from CLI), or in the GitHub `Settings > Secrets and variables` (working from GitHub App or GitHub Action):

```toml
[azure_ad]
client_id = ""  # Your Azure AD application client ID
client_secret = ""  # Your Azure AD application client secret
tenant_id = ""  # Your Azure AD tenant ID
api_base = ""  # Your Azure OpenAI service base URL (e.g., https://openai.xyz.com/)
```

Passing custom headers to the underlying LLM Model API can be done by setting extra_headers parameter to litellm.

```toml
[litellm]
extra_headers='{"projectId": "<authorized projectId >", ...}') #The value of this setting should be a JSON string representing the desired headers, a ValueError is thrown otherwise.
```

This enables users to pass authorization tokens or API keys, when routing requests through an API management gateway.

### Ollama

You can run models locally through either [VLLM](https://docs.litellm.ai/docs/providers/vllm) or [Ollama](https://docs.litellm.ai/docs/providers/ollama)

E.g. to use a new model locally via Ollama, set in `.secrets.toml` or in a configuration file:

```toml
[config]
model = "ollama/qwen2.5-coder:32b"
fallback_models=["ollama/qwen2.5-coder:32b"]
custom_model_max_tokens=128000 # set the maximal input tokens for the model
duplicate_examples=true # will duplicate the examples in the prompt, to help the model to generate structured output

[ollama]
api_base = "http://localhost:11434" # or whatever port you're running Ollama on
```

By default, Ollama uses a context window size of 2048 tokens. In most cases this is not enough to cover pr-agent prompt and pull-request diff. Context window size can be overridden with the `OLLAMA_CONTEXT_LENGTH` environment variable. For example, to set the default context length to 8K, use: `OLLAMA_CONTEXT_LENGTH=8192 ollama serve`. More information you can find on the [official ollama faq](https://github.com/ollama/ollama/blob/main/docs/faq.md#how-can-i-specify-the-context-window-size).

Please note that the `custom_model_max_tokens` setting should be configured in accordance with the `OLLAMA_CONTEXT_LENGTH`. Failure to do so may result in unexpected model output.

!!! note "Local models vs commercial models"
    PR-Agent is compatible with almost any AI model, but analyzing complex code repositories and pull requests requires a model specifically optimized for code analysis.

    Commercial models such as GPT-5, Claude Sonnet, and Gemini have demonstrated robust capabilities in generating structured output for code analysis tasks with large input. In contrast, most open-source models currently available (as of January 2025) face challenges with these complex tasks.

    Based on our testing, local open-source models are suitable for experimentation and learning purposes (mainly for the `ask` command), but they are not suitable for production-level code analysis tasks.
    
    Hence, for production workflows and real-world usage, we recommend using commercial models.

### Hugging Face

To use a new model with Hugging Face Inference Endpoints, for example, set:

```toml
[config] # in configuration.toml
model = "huggingface/meta-llama/Llama-2-7b-chat-hf"
fallback_models=["huggingface/meta-llama/Llama-2-7b-chat-hf"]
custom_model_max_tokens=... # set the maximal input tokens for the model

[huggingface] # in .secrets.toml
key = ... # your Hugging Face api key
api_base = ... # the base url for your Hugging Face inference endpoint
```

(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))

### Replicate

To use Llama2 model with Replicate, for example, set:

```toml
[config] # in configuration.toml
model = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"
fallback_models=["replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1"]
[replicate] # in .secrets.toml
key = ...
```

(you can obtain a Llama2 key from [here](https://replicate.com/replicate/llama-2-70b-chat/api))

Also, review the [.secrets_template.toml](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/.secrets_template.toml) file for instructions on how to set keys for other models.

### Groq

To use Llama3 model with Groq, for example, set:

```toml
[config] # in configuration.toml
model = "llama3-70b-8192"
fallback_models = ["groq/llama3-70b-8192"]
[groq] # in .secrets.toml
key = ... # your Groq api key
```

(you can obtain a Groq key from [here](https://console.groq.com/keys))

### xAI

To use xAI's models with PR-Agent, set:

```toml
[config] # in configuration.toml
model = "xai/grok-2-latest"
fallback_models = ["xai/grok-2-latest"] # or any other model as fallback

[xai] # in .secrets.toml
key = "..." # your xAI API key
```

You can obtain an xAI API key from [xAI's console](https://console.x.ai/) by creating an account and navigating to the developer settings page.

### Vertex AI

To use Google's Vertex AI platform and its associated models (chat-bison/codechat-bison) set:

```toml
[config] # in configuration.toml
model = "vertex_ai/codechat-bison"
fallback_models="vertex_ai/codechat-bison"

[vertexai] # in .secrets.toml
vertex_project = "my-google-cloud-project"
vertex_location = ""
```

Your [application default credentials](https://cloud.google.com/docs/authentication/application-default-credentials) will be used for authentication so there is no need to set explicit credentials in most environments.

If you do want to set explicit credentials, then you can use the `GOOGLE_APPLICATION_CREDENTIALS` environment variable set to a path to a json credentials file.

### Google AI Studio

To use [Google AI Studio](https://aistudio.google.com/) models, set the relevant models in the configuration section of the configuration file:

```toml
[config] # in configuration.toml
model="gemini/gemini-1.5-flash"
fallback_models=["gemini/gemini-1.5-flash"]

[google_ai_studio] # in .secrets.toml
gemini_api_key = "..."
```

If you don't want to set the API key in the .secrets.toml file, you can set the `GOOGLE_AI_STUDIO.GEMINI_API_KEY` environment variable.

### Anthropic

To use Anthropic models, set the relevant models in the configuration section of the configuration file:

```toml
[config]
model="anthropic/claude-3-opus-20240229"
fallback_models=["anthropic/claude-3-opus-20240229"]
```

And also set the api key in the .secrets.toml file:

```toml
[anthropic]
KEY = "..."
```

See [litellm](https://docs.litellm.ai/docs/providers/anthropic#usage) documentation for more information about the environment variables required for Anthropic.

### Amazon Bedrock

To use Amazon Bedrock and its foundational models, add the below configuration:

```toml
[config] # in configuration.toml
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
fallback_models=["bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"]

[aws]
AWS_ACCESS_KEY_ID="..."
AWS_SECRET_ACCESS_KEY="..."
AWS_REGION_NAME="..."
```

You can also use the new Meta Llama 4 models available on Amazon Bedrock:

```toml
[config] # in configuration.toml
model="bedrock/us.meta.llama4-scout-17b-instruct-v1:0"
fallback_models=["bedrock/us.meta.llama4-maverick-17b-instruct-v1:0"]
```

#### Custom Inference Profiles

To use a custom inference profile with Amazon Bedrock (for cost allocation tags and other configuration settings), add the `model_id` parameter to your configuration:

```toml
[config] # in configuration.toml
model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
fallback_models=["bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"]

[aws]
AWS_ACCESS_KEY_ID="..."
AWS_SECRET_ACCESS_KEY="..."
AWS_REGION_NAME="..."

[litellm]
model_id = "your-custom-inference-profile-id"
```

The `model_id` parameter will be passed to all Bedrock completion calls, allowing you to use custom inference profiles for better cost allocation and reporting.

See [litellm](https://docs.litellm.ai/docs/providers/bedrock#usage) documentation for more information about the environment variables required for Amazon Bedrock.

### DeepSeek

To use deepseek-chat model with DeepSeek, for example, set:

```toml
[config] # in configuration.toml
model = "deepseek/deepseek-chat"
fallback_models=["deepseek/deepseek-chat"]
```

and fill up your key

```toml
[deepseek] # in .secrets.toml
key = ...
```

(you can obtain a deepseek-chat key from [here](https://platform.deepseek.com))

### DeepInfra

To use DeepSeek model with DeepInfra, for example, set:

```toml
[config] # in configuration.toml
model = "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
fallback_models = ["deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"]
[deepinfra] # in .secrets.toml
key = ... # your DeepInfra api key
```

(you can obtain a DeepInfra key from [here](https://deepinfra.com/dash/api_keys))

### Mistral

To use models like Mistral or Codestral with Mistral, for example, set:

```toml
[config] # in configuration.toml
model = "mistral/mistral-small-latest"
fallback_models = ["mistral/mistral-medium-latest"]
[mistral] # in .secrets.toml
key = "..." # your Mistral api key
```

(you can obtain a Mistral key from [here](https://console.mistral.ai/api-keys))

### Codestral

To use Codestral model with Codestral, for example, set:

```toml
[config] # in configuration.toml
model = "codestral/codestral-latest"
fallback_models = ["codestral/codestral-2405"]
[codestral] # in .secrets.toml
key = "..." # your Codestral api key
```

(you can obtain a Codestral key from [here](https://console.mistral.ai/codestral))

### Openrouter

To use model from Openrouter, for example, set:

```toml
[config] # in configuration.toml 
model="openrouter/anthropic/claude-3.7-sonnet"
fallback_models=["openrouter/deepseek/deepseek-chat"]
custom_model_max_tokens=20000

[openrouter]  # in .secrets.toml or passed an environment variable openrouter__key
key = "..." # your openrouter api key
```

(you can obtain an Openrouter API key from [here](https://openrouter.ai/settings/keys))

### Custom models

If the relevant model doesn't appear [here](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/algo/__init__.py), you can still use it as a custom model:

1. Set the model name in the configuration file:

```toml
[config]
model="custom_model_name"
fallback_models=["custom_model_name"]
```

2. Set the maximal tokens for the model:

```toml
[config]
custom_model_max_tokens= ...
```

3. Go to [litellm documentation](https://litellm.vercel.app/docs/proxy/quick_start#supported-llms), find the model you want to use, and set the relevant environment variables.

4. Most reasoning models do not support chat-style inputs (`system` and `user` messages) or temperature settings.
To bypass chat templates and temperature controls, set `config.custom_reasoning_model = true` in your configuration file.

## Dedicated parameters

### OpenAI models

```toml
[config]
reasoning_effort = "medium" # "low", "medium", "high"
```

With the OpenAI models that support reasoning effort (eg: o4-mini), you can specify its reasoning effort via `config` section. The default value is `medium`. You can change it to `high` or `low` based on your usage.

### Anthropic models

```toml
[config]
enable_claude_extended_thinking = false # Set to true to enable extended thinking feature
extended_thinking_budget_tokens = 2048
extended_thinking_max_output_tokens = 4096
```


================================================
FILE: docs/docs/usage-guide/configuration_options.md
================================================
The different tools and sub-tools used by PR-Agent are adjustable via a Git configuration file.
There are three main ways to set persistent configurations:

1. [Wiki](./configuration_options.md#wiki-configuration-file) configuration page
2. [Local](./configuration_options.md#local-configuration-file) configuration file
3. [Global](./configuration_options.md#global-configuration-file) configuration file

In terms of precedence, wiki configurations will override local configurations, and local configurations will override global configurations.


For a list of all possible configurations, see the [configuration options](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) page.
In addition to general configuration options, each tool has its own configurations. For example, the `review` tool will use parameters from the [pr_reviewer](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L66) section in the configuration file.

!!! tip "Tip1: Edit only what you need"
    Your configuration file should be minimal, and edit only the relevant values. Don't copy the entire configuration options, since it can lead to legacy problems when something changes.
!!! tip "Tip2: Show relevant configurations"
    If you set `config.output_relevant_configurations` to True, each tool will also output in a collapsible section its relevant configurations. This can be useful for debugging, or getting to know the configurations better.


## Wiki configuration file

`Platforms supported: GitHub, GitLab, Bitbucket`

With PR-Agent, you can set configurations by creating a page called `.pr_agent.toml` in the [wiki](https://github.com/qodo-ai/pr-agent/wiki/pr_agent.toml) of the repo.
The advantage of this method is that it allows to set configurations without needing to commit new content to the repo - just edit the wiki page and **save**.

![wiki_configuration](https://codium.ai/images/pr_agent/wiki_configuration.png){width=512}

Click [here](https://codium.ai/images/pr_agent/wiki_configuration_pr_agent.mp4) to see a short instructional video. We recommend surrounding the configuration content with triple-quotes (or \`\`\`toml), to allow better presentation when displayed in the wiki as markdown.
An example content:

```toml
[pr_description]
generate_ai_title=true
```

PR-Agent will know to remove the surrounding quotes when reading the configuration content.

## Local configuration file

`Platforms supported: GitHub, GitLab, Bitbucket, Azure DevOps`

By uploading a local `.pr_agent.toml` file to the root of the repo's default branch, you can edit and customize any configuration parameter. Note that you need to upload or update `.pr_agent.toml` before using the PR Agent tools (either at PR creation or via manual trigger) for the configuration to take effect.

For example, if you set in `.pr_agent.toml`:

```
[pr_reviewer]
extra_instructions="""\
- instruction a
- instruction b
...
"""
```

Then you can give a list of extra instructions to the `review` tool.

## Global configuration file

`Platforms supported: GitHub, GitLab (cloud), Bitbucket (cloud)`

If you create a repo called `pr-agent-settings` in your **organization**, its configuration file `.pr_agent.toml` will be used as a global configuration file for any other repo that belongs to the same organization.
Parameters from a local `.pr_agent.toml` file, in a specific repo, will override the global configuration parameters.

For example, in the GitHub organization `qodo-ai`:

- The file [`https://github.com/qodo-ai/pr-agent-settings/.pr_agent.toml`](https://github.com/qodo-ai/pr-agent-settings/blob/main/.pr_agent.toml)  serves as a global configuration file for all the repos in the GitHub organization `qodo-ai`.

- The repo [`https://github.com/qodo-ai/pr-agent`](https://github.com/qodo-ai/pr-agent/blob/main/.pr_agent.toml) inherits the global configuration file from `pr-agent-settings`.

## Project/Group level configuration file

`Platforms supported: GitLab, Bitbucket Data Center`

Create a repository named `pr-agent-settings` within a specific project (Bitbucket) or a group/subgroup (Gitlab). 
The configuration file in this repository will apply to all repositories directly under the same project/group/subgroup.

!!! note "Note"
    For Gitlab, in case of a repository nested in several sub groups, the lookup for a pr-agent-settings repo will be only on one level above such repository.


## Organization level configuration file

`Relevant platforms: Bitbucket Data Center`

Create a dedicated project to hold a global configuration file that affects all repositories across all projects in your organization.

**Setting up organization-level global configuration:**

1. Create a new project with both the name and key: PR_AGENT_SETTINGS.
2. Inside the PR_AGENT_SETTINGS project, create a repository named pr-agent-settings.
3. In this repository, add a `.pr_agent.toml` configuration file—structured similarly to the global configuration file described above.
4. Optionally, you can add organizational-level [global best practices](../tools/improve.md#global-hierarchical-best-practices).

Repositories across your entire Bitbucket organization will inherit the configuration from this file.

!!! note "Note"
    If both organization-level and project-level global settings are defined, the project-level settings will take precedence over the organization-level configuration. Additionally, parameters from a repository’s local .pr_agent.toml file will always override both global settings.


================================================
FILE: docs/docs/usage-guide/index.md
================================================
# Usage guide

This section provides a detailed guide on how to use PR-Agent.
It includes information on how to adjust PR-Agent configurations, define which tools will run automatically, and other advanced configurations.

- [Introduction](./introduction.md)
- [Configuration File](./configuration_options.md)
- [Usage and Automation](./automations_and_usage.md)
    - [Local Repo (CLI)](./automations_and_usage.md#local-repo-cli)
    - [Online Usage](./automations_and_usage.md#online-usage)
    - [GitHub App](./automations_and_usage.md#github-app)
    - [GitHub Action](./automations_and_usage.md#github-action)
    - [GitLab Webhook](./automations_and_usage.md#gitlab-webhook)
    - [Gitea Webhook](./automations_and_usage.md#gitea-webhook)
    - [BitBucket App](./automations_and_usage.md#bitbucket-app)
    - [Azure DevOps Provider](./automations_and_usage.md#azure-devops-provider)
- [Managing Mail Notifications](./mail_notifications.md)
- [Changing a Model](./changing_a_model.md)
- [Additional Configurations](./additional_configurations.md)
    - [Ignoring files from analysis](./additional_configurations.md#ignoring-files-from-analysis)
    - [Extra instructions](./additional_configurations.md#extra-instructions)
    - [Working with large PRs](./additional_configurations.md#working-with-large-prs)
    - [Changing a model](./changing_a_model.md)
- [FAQ](../faq/index.md)


================================================
FILE: docs/docs/usage-guide/introduction.md
================================================
After [installation](../installation/index.md), there are three basic ways to invoke PR-Agent:

1. Locally running a CLI command
2. Online usage - by [commenting](https://github.com/qodo-ai/pr-agent/pull/229#issuecomment-1695021901){:target="_blank"} on a PR
3. Enabling PR-Agent tools to run automatically when a new PR is opened

Specifically, CLI commands can be issued by invoking a pre-built [docker image](../installation/locally.md#using-docker-image), or by invoking a [locally cloned repo](../installation/locally.md#run-from-source).

For online usage, you will need to setup either a [GitHub App](../installation/github.md#run-as-a-github-app) or a [GitHub Action](../installation/github.md#run-as-a-github-action) (GitHub), a [GitLab webhook](../installation/gitlab.md#run-a-gitlab-webhook-server) (GitLab), or a [BitBucket App](../installation/bitbucket.md#run-using-codiumai-hosted-bitbucket-app) (BitBucket).
These platforms also enable to run PR-Agent specific tools automatically when a new PR is opened, or on each push to a branch.


================================================
FILE: docs/docs/usage-guide/mail_notifications.md
================================================

Unfortunately, it is not possible in GitHub to disable mail notifications from a specific user.
If you are subscribed to notifications for a repo with PR-Agent, we recommend turning off notifications for PR comments, to avoid lengthy emails:

![notifications](https://codium.ai/images/pr_agent/notifications.png){width=512}

As an alternative, you can filter in your mail provider the notifications specifically from the PR-Agent bot, [see how](https://www.quora.com/How-can-you-filter-emails-for-specific-people-in-Gmail#:~:text=On%20the%20Filters%20and%20Blocked,the%20body%20of%20the%20email).

![filter_mail_notifications](https://codium.ai/images/pr_agent/filter_mail_notifications.png){width=512}

Another option to reduce the mail overload, yet still receive notifications on PR-Agent tools, is to disable the help collapsible section in PR-Agent bot comments.
This can done by setting `enable_help_text=false` for the relevant tool in the configuration file.
For example, to disable the help text for the `pr_reviewer` tool, set:

```
[pr_reviewer]
enable_help_text = false
```


================================================
FILE: docs/mkdocs.yml
================================================
site_name: PR-Agent
repo_url: https://github.com/qodo-ai/pr-agent
repo_name: Qodo-ai/pr-agent

nav:
  - Overview:
    - 'index.md'
    - Data Privacy: 'overview/data_privacy.md'
  - Installation:
    - 'installation/index.md'
    - PR-Agent: 'installation/pr_agent.md'
  - Usage Guide:
    - 'usage-guide/index.md'
    - Introduction: 'usage-guide/introduction.md'
    - Configuration File: 'usage-guide/configuration_options.md'
    - Usage and Automation: 'usage-guide/automations_and_usage.md'
    - Managing Mail Notifications: 'usage-guide/mail_notifications.md'
    - Changing a Model: 'usage-guide/changing_a_model.md'
    - Additional Configurations: 'usage-guide/additional_configurations.md'
    - Frequently Asked Questions: 'faq/index.md'
  - Tools:
     - 'tools/index.md'
     - Describe: 'tools/describe.md'
     - Review: 'tools/review.md'
     - Improve: 'tools/improve.md'
     - Ask: 'tools/ask.md'
     - Add Docs: 'tools/add_docs.md'
     - Generate Labels: 'tools/generate_labels.md'
     - Similar Issues: 'tools/similar_issues.md'
     - Help: 'tools/help.md'
     - Help Docs: 'tools/help_docs.md'
     - Update Changelog: 'tools/update_changelog.md'
  - Core Abilities:
      - 'core-abilities/index.md'
      - Compression strategy: 'core-abilities/compression_strategy.md'
      - Dynamic context: 'core-abilities/dynamic_context.md'
      - Fetching ticket context: 'core-abilities/fetching_ticket_context.md'
      - Interactivity: 'core-abilities/interactivity.md'
      - Local and global metadata: 'core-abilities/metadata.md'
      - Self-reflection: 'core-abilities/self_reflection.md'
#  - Code Fine-tuning Benchmark: 'finetuning_benchmark/index.md'

theme:
  logo: assets/favicon.svg
  favicon: assets/favicon.svg
  name: material
  icon:
    repo: fontawesome/brands/github
  features:
    - navigation.tabs
    - navigation.expand
    - navigation.path
    - navigation.top
    - navigation.tracking
    - navigation.indexes
    - search.suggest
    - search.highlight
    - content.tabs.link
    - content.code.annotation
    - content.code.copy
    - announce.dismiss
  language: en
  custom_dir: overrides

  palette:
    - media: "(prefers-color-scheme)"
      toggle:
        icon: material/brightness-auto
        name: Switch to light mode
    - media: "(prefers-color-scheme: light)"
      scheme: default
      toggle:
        icon: material/toggle-switch-off-outline
        name: Switch to dark mode
      primary: custom
      accent: custom
    - media: "(prefers-color-scheme: dark)"
      scheme: slate
      toggle:
        icon: material/toggle-switch
        name: Switch to light mode
      primary: custom
      accent: custom

plugins:
  - social
  - search
  - glightbox

extra:
  generator: false
  social:
    - icon: fontawesome/brands/github
      link: https://github.com/qodo-ai/pr-agent

extra_css:
  - css/custom.css

markdown_extensions:
  - pymdownx.highlight:
      anchor_linenums: true
  - pymdownx.inlinehilite
  - pymdownx.snippets
  - admonition
  - pymdownx.arithmatex:
      generic: true
  - footnotes
  - pymdownx.details
  - pymdownx.superfences
  - pymdownx.mark
  - md_in_html
  - attr_list
  - pymdownx.emoji:
      emoji_index: !!python/name:material.extensions.emoji.twemoji
      emoji_generator: !!python/name:material.extensions.emoji.to_svg
  - pymdownx.tabbed:
      alternate_style: true
  - toc:
      title: On this page
      toc_depth: 3
      permalink: true


copyright: |
  &copy; 2026 PR-Agent Contributors


================================================
FILE: docs/overrides/main.html
================================================
{% extends "base.html" %}

{% block announce %}
  Open source PR Agent documentation. For the Qodo free version, Get Started: <a href="https://www.qodo.ai/get-started/">https://www.qodo.ai/get-started/</a>
{% endblock %}

{% block scripts %}
  {{ super() }}

    <!-- Google Tag Manager (noscript) -->
    <noscript><iframe src="https://www.googletagmanager.com/ns.html?id=GTM-5C9KZBM3"
    height="0" width="0" style="display:none;visibility:hidden"></iframe></noscript>
    <!-- End Google Tag Manager (noscript) -->
{% endblock %}


================================================
FILE: docs/overrides/partials/footer.html
================================================
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Footer</title>
<style>
  body {
    margin: 0;
    padding: 0;
    font-family: Arial, sans-serif;
    font-size: 16px;
  }

  .wrapper {
    background-color: #1a202c;
  }

  .container {
    display: flex;
    flex-direction: row;
    align-items: center;
    justify-content: space-between;
    color: white;
    padding: 20px;
    max-width: 61rem;
    margin-left: auto;
    margin-right: auto;
  }

  .footer-links, .social-icons {
    padding: 0;
    list-style-type: none;
    display: flex;
    justify-content: center;
    gap: 20px;
    align-items: center;
  }

  .footer-links a:hover, .social-icons a:hover {
    color: #a0aec0;
  }

  .social-icons svg {
    width: 24px;
    height: auto;
    fill: white;
  }

  .footer-text {
    width: 240px;
  }

  @media (max-width: 768px) {
    .container {
      flex-direction: column;
      align-items: center;
      text-align: center;
    }

    .footer-links, .social-icons, .footer-text {
      width: 100%;
      justify-content: center;
      margin: 10px 0;
    }

    .footer-links {
      order: 1;
    }

    .social-icons {
      order: 2;
    }

    .footer-text {
      order: 3;
    }
  }
</style>
</head>
<body>

<footer class="wrapper">
  <div class="container">
    <p class="footer-text">&copy; 2026 PR-Agent Contributors</p>
    <div class="footer-links">
      <a href="https://github.com/qodo-ai/pr-agent">GitHub</a>
    </div>
    <div class="social-icons">
      <a href="https://github.com/qodo-ai/pr-agent" target="_blank" rel="noopener" title="github.com" class="social-link">
        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512"><!--! Font Awesome Free 6.5.1 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2023 Fonticons, Inc.--><path d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z"></path></svg>
      </a>
    </div>
  </div>
</footer>

</body>
</html>


================================================
FILE: docs/overrides/partials/integrations/analytics/custom.html
================================================
<!-- Google Tag Manager -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
    new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
    j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src=
    'https://www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);
    })(window,document,'script','dataLayer','GTM-5C9KZBM3');</script>
    <!-- End Google Tag Manager -->


================================================
FILE: github_action/entrypoint.sh
================================================
#!/bin/bash
python /app/pr_agent/servers/github_action_runner.py


================================================
FILE: pr_agent/__init__.py
================================================


================================================
FILE: pr_agent/agent/__init__.py
================================================


================================================
FILE: pr_agent/agent/pr_agent.py
================================================
import shlex
from functools import partial

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.cli_args import CliArgs
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import get_logger
from pr_agent.tools.pr_add_docs import PRAddDocs
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_config import PRConfig
from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_generate_labels import PRGenerateLabels
from pr_agent.tools.pr_help_docs import PRHelpDocs
from pr_agent.tools.pr_help_message import PRHelpMessage
from pr_agent.tools.pr_line_questions import PR_LineQuestions
from pr_agent.tools.pr_questions import PRQuestions
from pr_agent.tools.pr_reviewer import PRReviewer
from pr_agent.tools.pr_similar_issue import PRSimilarIssue
from pr_agent.tools.pr_update_changelog import PRUpdateChangelog

command2class = {
    "auto_review": PRReviewer,
    "answer": PRReviewer,
    "review": PRReviewer,
    "review_pr": PRReviewer,
    "describe": PRDescription,
    "describe_pr": PRDescription,
    "improve": PRCodeSuggestions,
    "improve_code": PRCodeSuggestions,
    "ask": PRQuestions,
    "ask_question": PRQuestions,
    "ask_line": PR_LineQuestions,
    "update_changelog": PRUpdateChangelog,
    "config": PRConfig,
    "settings": PRConfig,
    "help": PRHelpMessage,
    "similar_issue": PRSimilarIssue,
    "add_docs": PRAddDocs,
    "generate_labels": PRGenerateLabels,
    "help_docs": PRHelpDocs,
}

commands = list(command2class.keys())


class PRAgent:
    def __init__(self, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        self.ai_handler = ai_handler  # will be initialized in run_action

    async def _handle_request(self, pr_url, request, notify=None) -> bool:
        # First, apply repo specific settings if exists
        apply_repo_settings(pr_url)

        # Then, apply user specific settings if exists
        if isinstance(request, str):
            request = request.replace("'", "\\'")
            lexer = shlex.shlex(request, posix=True)
            lexer.whitespace_split = True
            action, *args = list(lexer)
        else:
            action, *args = request

        # validate args
        is_valid, arg = CliArgs.validate_user_args(args)
        if not is_valid:
            get_logger().error(
                f"CLI argument for param '{arg}' is forbidden. Use instead a configuration file."
            )
            return False

        # Update settings from args
        args = update_settings_from_args(args)

        # Append the response language in the extra instructions
        response_language = get_settings().config.get('response_language', 'en-us')
        if response_language.lower() != 'en-us':
            get_logger().info(f'User has set the response language to: {response_language}')
            for key in get_settings():
                setting = get_settings().get(key)
                if str(type(setting)) == "<class 'dynaconf.utils.boxing.DynaBox'>":
                    if hasattr(setting, 'extra_instructions'):
                        current_extra_instructions = setting.extra_instructions
                        
                        # Define the language-specific instruction and the separator
                        lang_instruction_text = f"Your response MUST be written in the language corresponding to locale code: '{response_language}'. This is crucial."
                        separator_text = "\n======\n\nIn addition, "

                        # Check if the specific language instruction is already present to avoid duplication
                        if lang_instruction_text not in str(current_extra_instructions):
                            if current_extra_instructions: # If there's existing text
                                setting.extra_instructions = str(current_extra_instructions) + separator_text + lang_instruction_text
                            else: # If extra_instructions was None or empty
                                setting.extra_instructions = lang_instruction_text
                        # If lang_instruction_text is already present, do nothing.

        action = action.lstrip("/").lower()
        if action not in command2class:
            get_logger().warning(f"Unknown command: {action}")
            return False
        with get_logger().contextualize(command=action, pr_url=pr_url):
            get_logger().info("PR-Agent request handler started", analytics=True)
            if action == "answer":
                if notify:
                    notify()
                await PRReviewer(pr_url, is_answer=True, args=args, ai_handler=self.ai_handler).run()
            elif action == "auto_review":
                await PRReviewer(pr_url, is_auto=True, args=args, ai_handler=self.ai_handler).run()
            elif action in command2class:
                if notify:
                    notify()

                await command2class[action](pr_url, ai_handler=self.ai_handler, args=args).run()
            else:
                return False
            return True

    async def handle_request(self, pr_url, request, notify=None) -> bool:
        try:
            return await self._handle_request(pr_url, request, notify)
        except:
            get_logger().exception("Failed to process the command.")
            return False


================================================
FILE: pr_agent/algo/__init__.py
================================================
MAX_TOKENS = {
    'text-embedding-ada-002': 8000,
    'gpt-3.5-turbo': 16000,
    'gpt-3.5-turbo-0125': 16000,
    'gpt-3.5-turbo-0613': 4000,
    'gpt-3.5-turbo-1106': 16000,
    'gpt-3.5-turbo-16k': 16000,
    'gpt-3.5-turbo-16k-0613': 16000,
    'gpt-4': 8000,
    'gpt-4-0613': 8000,
    'gpt-4-32k': 32000,
    'gpt-4-1106-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4-0125-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4o': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4o-2024-05-13': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4-turbo-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4-turbo-2024-04-09': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4-turbo': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4o-mini': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4o-mini-2024-07-18': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4o-2024-08-06': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4o-2024-11-20': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4.5-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4.5-preview-2025-02-27': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-4.1': 1047576,
    'gpt-4.1-2025-04-14': 1047576,
    'gpt-4.1-mini': 1047576,
    'gpt-4.1-mini-2025-04-14': 1047576,
    'gpt-4.1-nano': 1047576,
    'gpt-4.1-nano-2025-04-14': 1047576,
    'gpt-5-nano': 200000,  # 200K, but may be limited by config.max_model_tokens
    'gpt-5-mini': 200000,  # 200K, but may be limited by config.max_model_tokens
    'gpt-5': 200000,
    'gpt-5-2025-08-07': 200000,
    'gpt-5.1': 200000,
    'gpt-5.1-2025-11-13': 200000,
    'gpt-5.1-chat-latest': 200000,
    'gpt-5.1-codex': 200000,
    'gpt-5.1-codex-mini': 200000,
    'gpt-5.2': 400000,  # 400K, but may be limited by config.max_model_tokens
    'gpt-5.2-2025-12-11': 400000,  # 400K, but may be limited by config.max_model_tokens
    'gpt-5.2-chat-latest': 128000,  # 128K, but may be limited by config.max_model_tokens
    'gpt-5.2-codex': 400000,  # 400K, but may be limited by config.max_model_tokens
    'gpt-5.3-codex': 400000,  # 400K, but may be limited by config.max_model_tokens
    'gpt-5.4': 272000,  # 272K safe default without opt-in 1M context parameters
    'gpt-5.4-2026-03-05': 272000,  # 272K safe default without opt-in 1M context parameters
    'o1-mini': 128000,  # 128K, but may be limited by config.max_model_tokens
    'o1-mini-2024-09-12': 128000,  # 128K, but may be limited by config.max_model_tokens
    'o1-preview': 128000,  # 128K, but may be limited by config.max_model_tokens
    'o1-preview-2024-09-12': 128000,  # 128K, but may be limited by config.max_model_tokens
    'o1-2024-12-17': 204800,  # 200K, but may be limited by config.max_model_tokens
    'o1': 204800,  # 200K, but may be limited by config.max_model_tokens
    'o3-mini': 204800,  # 200K, but may be limited by config.max_model_tokens
    'o3-mini-2025-01-31': 204800,  # 200K, but may be limited by config.max_model_tokens
    'o3': 200000,  # 200K, but may be limited by config.max_model_tokens
    'o3-2025-04-16': 200000,  # 200K, but may be limited by config.max_model_tokens
    'o4-mini': 200000, # 200K, but may be limited by config.max_model_tokens
    'o4-mini-2025-04-16': 200000, # 200K, but may be limited by config.max_model_tokens
    'claude-instant-1': 100000,
    'claude-2': 100000,
    'command-nightly': 4096,
    'deepseek/deepseek-chat': 128000,  # 128K, but may be limited by config.max_model_tokens
    'deepseek/deepseek-reasoner': 64000,  # 64K, but may be limited by config.max_model_tokens
    'openai/qwq-plus': 131072,  # 131K context length, but may be limited by config.max_model_tokens
    'replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1': 4096,
    'meta-llama/Llama-2-7b-chat-hf': 4096,
    'vertex_ai/codechat-bison': 6144,
    'vertex_ai/codechat-bison-32k': 32000,
    'vertex_ai/claude-3-haiku@20240307': 100000,
    'vertex_ai/claude-3-5-haiku@20241022': 100000,
    'vertex_ai/claude-haiku-4-5@20251001': 200000,
    'vertex_ai/claude-3-sonnet@20240229': 100000,
    'vertex_ai/claude-3-opus@20240229': 100000,
    'vertex_ai/claude-opus-4@20250514': 200000,
    'vertex_ai/claude-opus-4-1@20250805': 200000,
    'vertex_ai/claude-opus-4-5@20251101': 200000,
    'vertex_ai/claude-opus-4-6@20260120': 200000,
    'vertex_ai/claude-opus-4-6': 200000,
    'vertex_ai/claude-3-5-sonnet@20240620': 100000,
    'vertex_ai/claude-3-5-sonnet-v2@20241022': 100000,
    'vertex_ai/claude-3-7-sonnet@20250219': 200000,
    'vertex_ai/claude-sonnet-4@20250514': 200000,
    'vertex_ai/claude-sonnet-4-5@20250929': 200000,
    'vertex_ai/claude-sonnet-4-6': 200000,
    'vertex_ai/gemini-1.5-pro': 1048576,
    'vertex_ai/gemini-2.5-pro-preview-03-25': 1048576,
    'vertex_ai/gemini-2.5-pro-preview-05-06': 1048576,
    'vertex_ai/gemini-2.5-pro-preview-06-05': 1048576,
    'vertex_ai/gemini-2.5-pro': 1048576,
    'vertex_ai/gemini-1.5-flash': 1048576,
    'vertex_ai/gemini-2.0-flash': 1048576,
    'vertex_ai/gemini-2.5-flash-preview-04-17': 1048576,
    'vertex_ai/gemini-2.5-flash-preview-05-20': 1048576,
    'vertex_ai/gemini-2.5-flash': 1048576,
    'vertex_ai/gemini-3-flash-preview': 1048576,
    'vertex_ai/gemini-3-pro-preview': 1048576,
    'vertex_ai/gemini-3.1-pro-preview': 1048576,
    'vertex_ai/gemma2': 8200,
    'gemini/gemini-1.5-pro': 1048576,
    'gemini/gemini-1.5-flash': 1048576,
    'gemini/gemini-2.0-flash': 1048576,
    'gemini/gemini-2.5-flash-preview-04-17': 1048576,
    'gemini/gemini-2.5-flash-preview-05-20': 1048576,
    'gemini/gemini-2.5-flash': 1048576,
    'gemini/gemini-2.5-pro-preview-03-25': 1048576,
    'gemini/gemini-2.5-pro-preview-05-06': 1048576,
    'gemini/gemini-2.5-pro-preview-06-05': 1048576,
    'gemini/gemini-2.5-pro': 1048576,
    'gemini/gemini-3-flash-preview': 1048576,
    'gemini/gemini-3-pro-preview': 1048576,
    'gemini/gemini-3.1-pro-preview': 1048576,
    'codechat-bison': 6144,
    'codechat-bison-32k': 32000,
    'anthropic.claude-instant-v1': 100000,
    'anthropic.claude-v1': 100000,
    'anthropic.claude-v2': 100000,
    'anthropic/claude-3-opus-20240229': 100000,
    'anthropic/claude-opus-4-20250514': 200000,
    'anthropic/claude-opus-4-1-20250805': 200000,
    'anthropic/claude-opus-4-5-20251101': 200000,
    'anthropic/claude-opus-4-6': 200000,
    'anthropic/claude-opus-4-6-20260120': 200000,
    'anthropic/claude-3-5-sonnet-20240620': 100000,
    'anthropic/claude-3-5-sonnet-20241022': 100000,
    'anthropic/claude-3-7-sonnet-20250219': 200000,
    'anthropic/claude-sonnet-4-20250514': 200000,
    'anthropic/claude-sonnet-4-5-20250929': 200000,
    'anthropic/claude-sonnet-4-6': 200000,
    'claude-opus-4-1-20250805': 200000,
    'claude-opus-4-5-20251101': 200000,
    'claude-opus-4-6': 200000,
    'claude-opus-4-6-20260120': 200000,
    'claude-3-7-sonnet-20250219': 200000,
    'claude-sonnet-4-6': 200000,
    'anthropic/claude-3-5-haiku-20241022': 100000,
    'anthropic/claude-haiku-4-5-20251001': 200000,
    'claude-haiku-4-5-20251001': 200000,
    'bedrock/anthropic.claude-instant-v1': 100000,
    'bedrock/anthropic.claude-v2': 100000,
    'bedrock/anthropic.claude-v2:1': 100000,
    'bedrock/anthropic.claude-3-sonnet-20240229-v1:0': 100000,
    'bedrock/anthropic.claude-opus-4-20250514-v1:0': 200000,
    'bedrock/anthropic.claude-opus-4-1-20250805-v1:0': 200000,
    'bedrock/anthropic.claude-opus-4-6-20260120-v1:0': 200000,
    'bedrock/anthropic.claude-opus-4-6-v1:0': 200000,
    'bedrock/anthropic.claude-3-haiku-20240307-v1:0': 100000,
    'bedrock/anthropic.claude-3-5-haiku-20241022-v1:0': 100000,
    'bedrock/anthropic.claude-haiku-4-5-20251001-v1:0': 200000,
    'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0': 100000,
    'bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0': 100000,
    'bedrock/anthropic.claude-3-7-sonnet-20250219-v1:0': 200000,
    'bedrock/anthropic.claude-sonnet-4-20250514-v1:0': 200000,
    'bedrock/anthropic.claude-sonnet-4-5-20250929-v1:0': 200000,
    'bedrock/anthropic.claude-sonnet-4-6': 200000,
    "bedrock/us.anthropic.claude-opus-4-20250514-v1:0": 200000,
    "bedrock/us.anthropic.claude-opus-4-1-20250805-v1:0": 200000,
    "bedrock/us.anthropic.claude-opus-4-6-20260120-v1:0": 200000,
    "bedrock/global.anthropic.claude-opus-4-5-20251101-v1:0": 200000,
    "bedrock/us.anthropic.claude-opus-4-5-20251101-v1:0": 200000,
    "bedrock/global.anthropic.claude-opus-4-6-v1:0": 200000,
    "bedrock/us.anthropic.claude-opus-4-6-v1:0": 200000,
    "bedrock/us.anthropic.claude-3-5-sonnet-20241022-v2:0": 100000,
    "bedrock/us.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
    "bedrock/eu.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
    "bedrock/au.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
    "bedrock/jp.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
    "bedrock/apac.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
    "bedrock/global.anthropic.claude-haiku-4-5-20251001-v1:0": 200000,
    "bedrock/us.anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
    "bedrock/us.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
    "bedrock/global.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
    "bedrock/us.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
    "bedrock/au.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
    "bedrock/us.anthropic.claude-sonnet-4-6": 200000,
    "bedrock/au.anthropic.claude-sonnet-4-6": 200000,
    "bedrock/apac.anthropic.claude-3-5-sonnet-20241022-v2:0": 100000,
    "bedrock/apac.anthropic.claude-3-7-sonnet-20250219-v1:0": 200000,
    "bedrock/apac.anthropic.claude-sonnet-4-20250514-v1:0": 200000,
    "bedrock/eu.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
    "bedrock/eu.anthropic.claude-sonnet-4-6": 200000,
    "bedrock/jp.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
    "bedrock/jp.anthropic.claude-sonnet-4-6": 200000,
    "bedrock/global.anthropic.claude-sonnet-4-5-20250929-v1:0": 200000,
    "bedrock/global.anthropic.claude-sonnet-4-6": 200000,
    'claude-3-5-sonnet': 100000,
    'bedrock/us.meta.llama4-scout-17b-instruct-v1:0': 128000,
    'bedrock/us.meta.llama4-maverick-17b-instruct-v1:0': 128000,
    'groq/openai/gpt-oss-120b': 131072,
    'groq/openai/gpt-oss-20b': 131072,
    'groq/qwen/qwen3-32b': 131000,
    'groq/moonshotai/kimi-k2-instruct': 131072,
    'groq/deepseek-r1-distill-llama-70b': 128000,
    'groq/meta-llama/llama-4-maverick-17b-128e-instruct': 131072,
    'groq/meta-llama/llama-4-scout-17b-16e-instruct': 131072,
    'groq/llama-3.3-70b-versatile': 128000,
    'groq/llama-3.1-8b-instant': 128000,
    'xai/grok-2': 131072,
    'xai/grok-2-1212': 131072,
    'xai/grok-2-latest': 131072,
    'xai/grok-3': 131072,
    'xai/grok-3-beta': 131072,
    'xai/grok-3-fast': 131072,
    'xai/grok-3-fast-beta': 131072,
    'xai/grok-3-mini': 131072,
    'xai/grok-3-mini-beta': 131072,
    'xai/grok-3-mini-fast': 131072,
    'xai/grok-3-mini-fast-beta': 131072,
    'ollama/llama3': 4096,
    'watsonx/meta-llama/llama-3-8b-instruct': 4096,
    "watsonx/meta-llama/llama-3-70b-instruct": 4096,
    "watsonx/meta-llama/llama-3-405b-instruct": 16384,
    "watsonx/ibm/granite-13b-chat-v2": 8191,
    "watsonx/ibm/granite-34b-code-instruct": 8191,
    "watsonx/mistralai/mistral-large": 32768,
    "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": 128000,
    "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": 128000,
    "deepinfra/deepseek-ai/DeepSeek-R1": 128000,
    "mistral/mistral-small-latest": 8191,
    "mistral/mistral-medium-latest": 8191,
    "mistral/mistral-large-2407": 128000,
    "mistral/mistral-large-latest": 128000,
    "mistral/open-mistral-7b": 8191,
    "mistral/open-mixtral-8x7b": 8191,
    "mistral/open-mixtral-8x22b": 8191,
    "mistral/codestral-latest": 8191,
    "mistral/open-mistral-nemo": 128000,
    "mistral/open-mistral-nemo-2407": 128000,
    "mistral/open-codestral-mamba": 256000,
    "mistral/codestral-mamba-latest": 256000,
    "codestral/codestral-latest": 8191,
    "codestral/codestral-2405": 8191,
}

USER_MESSAGE_ONLY_MODELS = [
    "deepseek/deepseek-reasoner",
    "o1-mini",
    "o1-mini-2024-09-12",
    "o1-preview"
]

NO_SUPPORT_TEMPERATURE_MODELS = [
    "deepseek/deepseek-reasoner",
    "o1-mini",
    "o1-mini-2024-09-12",
    "o1",
    "o1-2024-12-17",
    "o3-mini",
    "o3-mini-2025-01-31",
    "o1-preview",
    "o3",
    "o3-2025-04-16",
    "o4-mini",
    "o4-mini-2025-04-16",
    "gpt-5.1-codex",
    "gpt-5.1-codex-mini",
    "gpt-5.2-codex",
    "gpt-5.3-codex",
    "gpt-5-mini"
]

SUPPORT_REASONING_EFFORT_MODELS = [
    "o3-mini",
    "o3-mini-2025-01-31",
    "o3",
    "o3-2025-04-16",
    "o4-mini",
    "o4-mini-2025-04-16",
]

CLAUDE_EXTENDED_THINKING_MODELS = [
    "anthropic/claude-3-7-sonnet-20250219",
    "claude-3-7-sonnet-20250219"
]

# Models that require streaming mode
STREAMING_REQUIRED_MODELS = [
    "openai/qwq-plus"
]


================================================
FILE: pr_agent/algo/ai_handlers/base_ai_handler.py
================================================
from abc import ABC, abstractmethod


class BaseAiHandler(ABC):
    """
    This class defines the interface for an AI handler to be used by the PR Agents.
    """

    @abstractmethod
    def __init__(self):
        pass

    @property
    @abstractmethod
    def deployment_id(self):
        pass

    @abstractmethod
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
        """
        This method should be implemented to return a chat completion from the AI model.
        Args:
            model (str): the name of the model to use for the chat completion
            system (str): the system message string to use for the chat completion
            user (str): the user message string to use for the chat completion
            temperature (float): the temperature to use for the chat completion
        """
        pass


================================================
FILE: pr_agent/algo/ai_handlers/langchain_ai_handler.py
================================================
_LANGCHAIN_INSTALLED = False

try:
    from langchain_core.messages import HumanMessage, SystemMessage
    from langchain_openai import AzureChatOpenAI, ChatOpenAI
    _LANGCHAIN_INSTALLED = True
except:  # we don't enforce langchain as a dependency, so if it's not installed, just move on
    pass

import functools

import openai
from tenacity import retry, retry_if_exception_type, retry_if_not_exception_type, stop_after_attempt
from langchain_core.runnables import Runnable

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger

OPENAI_RETRIES = 5


class LangChainOpenAIHandler(BaseAiHandler):
    def __init__(self):
        if not _LANGCHAIN_INSTALLED:
            error_msg = "LangChain is not installed. Please install it with `pip install langchain`."
            get_logger().error(error_msg)
            raise ImportError(error_msg)
        
        super().__init__()
        self.azure = get_settings().get("OPENAI.API_TYPE", "").lower() == "azure"

    @property
    def deployment_id(self):
        """
        Returns the deployment ID for the OpenAI API.
        """
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

    async def _create_chat_async(self, deployment_id=None):
        try:
            if self.azure:
                # Using Azure OpenAI service
                return AzureChatOpenAI(
                    openai_api_key=get_settings().openai.key,
                    openai_api_version=get_settings().openai.api_version,
                    azure_deployment=deployment_id,
                    azure_endpoint=get_settings().openai.api_base,
                )
            else:
                # Using standard OpenAI or other LLM services
                openai_api_base = get_settings().get("OPENAI.API_BASE", None)
                if openai_api_base is None or len(openai_api_base) == 0:
                    return ChatOpenAI(openai_api_key=get_settings().openai.key)
                else:
                    return ChatOpenAI(
                        openai_api_key=get_settings().openai.key, 
                        openai_api_base=openai_api_base
                    )
        except AttributeError as e:
            # Handle configuration errors
            error_msg = f"OpenAI {e.name} is required" if getattr(e, "name") else str(e)
            get_logger().error(error_msg)
            raise ValueError(error_msg) from e

    @retry(
        retry=retry_if_exception_type(openai.APIError) & retry_if_not_exception_type(openai.RateLimitError),
        stop=stop_after_attempt(OPENAI_RETRIES),
    )
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
        if img_path:
            get_logger().warning(f"Image path is not supported for LangChainOpenAIHandler. Ignoring image path: {img_path}")
        try:
            messages = [SystemMessage(content=system), HumanMessage(content=user)]
            llm = await self._create_chat_async(deployment_id=self.deployment_id)
            
            if not isinstance(llm, Runnable):
                error_message = (
                    f"The Langchain LLM object ({type(llm)}) does not implement the Runnable interface. "
                    f"Please update your Langchain library to the latest version or "
                    f"check your LLM configuration to support async calls. "
                    f"PR-Agent is designed to utilize Langchain's async capabilities."
                )
                get_logger().error(error_message)
                raise NotImplementedError(error_message)

            # Handle parameters based on LLM type
            if isinstance(llm, (ChatOpenAI, AzureChatOpenAI)):
                # OpenAI models support all parameters
                resp = await llm.ainvoke(
                    input=messages,
                    model=model,
                    temperature=temperature
                )
            else:
                # Other LLMs (like Gemini) only support input parameter
                get_logger().info(f"Using simplified ainvoke for {type(llm)}")
                resp = await llm.ainvoke(input=messages)

            finish_reason = "completed"
            return resp.content, finish_reason

        except openai.RateLimitError as e:
            get_logger().error(f"Rate limit error during LLM inference: {e}")
            raise
        except openai.APIError as e:
            get_logger().warning(f"Error during LLM inference: {e}")
            raise
        except Exception as e:
            get_logger().warning(f"Unknown error during LLM inference: {e}")
            raise openai.APIError from e


================================================
FILE: pr_agent/algo/ai_handlers/litellm_ai_handler.py
================================================
import os
import litellm
import openai
import requests
from litellm import acompletion
from tenacity import retry, retry_if_exception_type, retry_if_not_exception_type, stop_after_attempt

from pr_agent.algo import CLAUDE_EXTENDED_THINKING_MODELS, NO_SUPPORT_TEMPERATURE_MODELS, SUPPORT_REASONING_EFFORT_MODELS, USER_MESSAGE_ONLY_MODELS, STREAMING_REQUIRED_MODELS
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_helpers import _handle_streaming_response, MockResponse, _get_azure_ad_token, \
    _process_litellm_extra_body
from pr_agent.algo.utils import ReasoningEffort, get_version
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger
import json

MODEL_RETRIES = 2


class LiteLLMAIHandler(BaseAiHandler):
    """
    This class handles interactions with the OpenAI API for chat completions.
    It initializes the API key and other settings from a configuration file,
    and provides a method for performing chat completions using the OpenAI ChatCompletion API.
    """

    def __init__(self):
        """
        Initializes the OpenAI API key and other settings from a configuration file.
        Raises a ValueError if the OpenAI key is missing.
        """
        self.azure = False
        self.api_base = None
        self.repetition_penalty = None

        if get_settings().get("LITELLM.DISABLE_AIOHTTP", False):
            litellm.disable_aiohttp_transport = True
        if get_settings().get("OPENAI.KEY", None):
            openai.api_key = get_settings().openai.key
            litellm.openai_key = get_settings().openai.key
        elif 'OPENAI_API_KEY' not in os.environ:
            litellm.api_key = "dummy_key"
        if get_settings().get("aws.AWS_ACCESS_KEY_ID"):
            assert get_settings().aws.AWS_SECRET_ACCESS_KEY and get_settings().aws.AWS_REGION_NAME, "AWS credentials are incomplete"
            os.environ["AWS_ACCESS_KEY_ID"] = get_settings().aws.AWS_ACCESS_KEY_ID
            os.environ["AWS_SECRET_ACCESS_KEY"] = get_settings().aws.AWS_SECRET_ACCESS_KEY
            os.environ["AWS_REGION_NAME"] = get_settings().aws.AWS_REGION_NAME
        if get_settings().get("LITELLM.DROP_PARAMS", None):
            litellm.drop_params = get_settings().litellm.drop_params
        if get_settings().get("LITELLM.SUCCESS_CALLBACK", None):
            litellm.success_callback = get_settings().litellm.success_callback
        if get_settings().get("LITELLM.FAILURE_CALLBACK", None):
            litellm.failure_callback = get_settings().litellm.failure_callback
        if get_settings().get("LITELLM.SERVICE_CALLBACK", None):
            litellm.service_callback = get_settings().litellm.service_callback
        if get_settings().get("OPENAI.ORG", None):
            litellm.organization = get_settings().openai.org
        if get_settings().get("OPENAI.API_TYPE", None):
            if get_settings().openai.api_type == "azure":
                self.azure = True
                litellm.azure_key = get_settings().openai.key
        if get_settings().get("OPENAI.API_VERSION", None):
            litellm.api_version = get_settings().openai.api_version
        if get_settings().get("OPENAI.API_BASE", None):
            litellm.api_base = get_settings().openai.api_base
            self.api_base = get_settings().openai.api_base
        if get_settings().get("ANTHROPIC.KEY", None):
            litellm.anthropic_key = get_settings().anthropic.key
        if get_settings().get("COHERE.KEY", None):
            litellm.cohere_key = get_settings().cohere.key
        if get_settings().get("GROQ.KEY", None):
            litellm.api_key = get_settings().groq.key
        if get_settings().get("REPLICATE.KEY", None):
            litellm.replicate_key = get_settings().replicate.key
        if get_settings().get("XAI.KEY", None):
            litellm.api_key = get_settings().xai.key
        if get_settings().get("HUGGINGFACE.KEY", None):
            litellm.huggingface_key = get_settings().huggingface.key
        if get_settings().get("HUGGINGFACE.API_BASE", None) and 'huggingface' in get_settings().config.model:
            litellm.api_base = get_settings().huggingface.api_base
            self.api_base = get_settings().huggingface.api_base
        if get_settings().get("OLLAMA.API_BASE", None):
            litellm.api_base = get_settings().ollama.api_base
            self.api_base = get_settings().ollama.api_base
        if get_settings().get("HUGGINGFACE.REPETITION_PENALTY", None):
            self.repetition_penalty = float(get_settings().huggingface.repetition_penalty)
        if get_settings().get("VERTEXAI.VERTEX_PROJECT", None):
            litellm.vertex_project = get_settings().vertexai.vertex_project
            litellm.vertex_location = get_settings().get(
                "VERTEXAI.VERTEX_LOCATION", None
            )
        # Google AI Studio
        # SEE https://docs.litellm.ai/docs/providers/gemini
        if get_settings().get("GOOGLE_AI_STUDIO.GEMINI_API_KEY", None):
          os.environ["GEMINI_API_KEY"] = get_settings().google_ai_studio.gemini_api_key

        # Support deepseek models
        if get_settings().get("DEEPSEEK.KEY", None):
            os.environ['DEEPSEEK_API_KEY'] = get_settings().get("DEEPSEEK.KEY")

        # Support deepinfra models
        if get_settings().get("DEEPINFRA.KEY", None):
            os.environ['DEEPINFRA_API_KEY'] = get_settings().get("DEEPINFRA.KEY")

        # Support mistral models
        if get_settings().get("MISTRAL.KEY", None):
            os.environ["MISTRAL_API_KEY"] = get_settings().get("MISTRAL.KEY")
        
        # Support codestral models
        if get_settings().get("CODESTRAL.KEY", None):
            os.environ["CODESTRAL_API_KEY"] = get_settings().get("CODESTRAL.KEY")

        # Check for Azure AD configuration
        if get_settings().get("AZURE_AD.CLIENT_ID", None):
            self.azure = True
            # Generate access token using Azure AD credentials from settings
            access_token = _get_azure_ad_token()
            litellm.api_key = access_token
            openai.api_key = access_token
            
            # Set API base from settings
            self.api_base = get_settings().azure_ad.api_base
            litellm.api_base = self.api_base
            openai.api_base = self.api_base

        # Support for Openrouter models
        if get_settings().get("OPENROUTER.KEY", None):
            openrouter_api_key = get_settings().get("OPENROUTER.KEY", None)
            os.environ["OPENROUTER_API_KEY"] = openrouter_api_key
            litellm.api_key = openrouter_api_key
            openai.api_key = openrouter_api_key

            openrouter_api_base = get_settings().get("OPENROUTER.API_BASE", "https://openrouter.ai/api/v1")
            os.environ["OPENROUTER_API_BASE"] = openrouter_api_base
            self.api_base = openrouter_api_base
            litellm.api_base = openrouter_api_base

        # Models that only use user message
        self.user_message_only_models = USER_MESSAGE_ONLY_MODELS

        # Model that doesn't support temperature argument
        self.no_support_temperature_models = NO_SUPPORT_TEMPERATURE_MODELS

        # Models that support reasoning effort
        self.support_reasoning_models = SUPPORT_REASONING_EFFORT_MODELS

        # Models that support extended thinking
        self.claude_extended_thinking_models = CLAUDE_EXTENDED_THINKING_MODELS

        # Models that require streaming
        self.streaming_required_models = STREAMING_REQUIRED_MODELS

    def prepare_logs(self, response, system, user, resp, finish_reason):
        response_log = response.dict().copy()
        response_log['system'] = system
        response_log['user'] = user
        response_log['output'] = resp
        response_log['finish_reason'] = finish_reason
        if hasattr(self, 'main_pr_language'):
            response_log['main_pr_language'] = self.main_pr_language
        else:
            response_log['main_pr_language'] = 'unknown'
        return response_log

    def _configure_claude_extended_thinking(self, model: str, kwargs: dict) -> dict:
        """
        Configure Claude extended thinking parameters if applicable.

        Args:
            model (str): The AI model being used
            kwargs (dict): The keyword arguments for the model call

        Returns:
            dict: Updated kwargs with extended thinking configuration
        """
        extended_thinking_budget_tokens = get_settings().config.get("extended_thinking_budget_tokens", 2048)
        extended_thinking_max_output_tokens = get_settings().config.get("extended_thinking_max_output_tokens", 4096)

        # Validate extended thinking parameters
        if not isinstance(extended_thinking_budget_tokens, int) or extended_thinking_budget_tokens <= 0:
            raise ValueError(f"extended_thinking_budget_tokens must be a positive integer, got {extended_thinking_budget_tokens}")
        if not isinstance(extended_thinking_max_output_tokens, int) or extended_thinking_max_output_tokens <= 0:
            raise ValueError(f"extended_thinking_max_output_tokens must be a positive integer, got {extended_thinking_max_output_tokens}")
        if extended_thinking_max_output_tokens < extended_thinking_budget_tokens:
            raise ValueError(f"extended_thinking_max_output_tokens ({extended_thinking_max_output_tokens}) must be greater than or equal to extended_thinking_budget_tokens ({extended_thinking_budget_tokens})")

        kwargs["thinking"] = {
            "type": "enabled",
            "budget_tokens": extended_thinking_budget_tokens
        }
        if get_settings().config.verbosity_level >= 2:
            get_logger().info(f"Adding max output tokens {extended_thinking_max_output_tokens} to model {model}, extended thinking budget tokens: {extended_thinking_budget_tokens}")
        kwargs["max_tokens"] = extended_thinking_max_output_tokens

        # temperature may only be set to 1 when thinking is enabled
        if get_settings().config.verbosity_level >= 2:
            get_logger().info("Temperature may only be set to 1 when thinking is enabled with claude models.")
        kwargs["temperature"] = 1

        return kwargs

    def add_litellm_callbacks(self, kwargs) -> dict:
        captured_extra = []

        def capture_logs(message):
            # Parsing the log message and context
            record = message.record
            log_entry = {}
            if record.get('extra', None).get('command', None) is not None:
                log_entry.update({"command": record['extra']["command"]})
            if record.get('extra', {}).get('pr_url', None) is not None:
                log_entry.update({"pr_url": record['extra']["pr_url"]})

            # Append the log entry to the captured_logs list
            captured_extra.append(log_entry)

        # Adding the custom sink to Loguru
        handler_id = get_logger().add(capture_logs)
        get_logger().debug("Capturing logs for litellm callbacks")
        get_logger().remove(handler_id)

        context = captured_extra[0] if len(captured_extra) > 0 else None

        command = context.get("command", "unknown")
        pr_url = context.get("pr_url", "unknown")
        git_provider = get_settings().config.git_provider

        metadata = dict()
        callbacks = litellm.success_callback + litellm.failure_callback + litellm.service_callback
        if "langfuse" in callbacks:
            metadata.update({
                "trace_name": command,
                "tags": [git_provider, command, f'version:{get_version()}'],
                "trace_metadata": {
                    "command": command,
                    "pr_url": pr_url,
                },
            })
        if "langsmith" in callbacks:
            metadata.update({
                "run_name": command,
                "tags": [git_provider, command, f'version:{get_version()}'],
                "extra": {
                    "metadata": {
                        "command": command,
                        "pr_url": pr_url,
                    }
                },
            })

        # Adding the captured logs to the kwargs
        kwargs["metadata"] = metadata

        return kwargs

    @property
    def deployment_id(self):
        """
        Returns the deployment ID for the OpenAI API.
        """
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

    @retry(
        retry=retry_if_exception_type(openai.APIError) & retry_if_not_exception_type(openai.RateLimitError),
        stop=stop_after_attempt(MODEL_RETRIES),
    )
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
        try:
            resp, finish_reason = None, None
            deployment_id = self.deployment_id
            if self.azure:
                model = 'azure/' + model
            if 'claude' in model and not system:
                system = "No system prompt provided"
                get_logger().warning(
                    "Empty system prompt for claude model. Adding a newline character to prevent OpenAI API error.")
            messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]

            if img_path:
                try:
                    # check if the image link is alive
                    r = requests.head(img_path, allow_redirects=True)
                    if r.status_code == 404:
                        error_msg = f"The image link is not [alive](img_path).\nPlease repost the original image as a comment, and send the question again with 'quote reply' (see [instructions](https://pr-agent-docs.codium.ai/tools/ask/#ask-on-images-using-the-pr-code-as-context))."
                        get_logger().error(error_msg)
                        return f"{error_msg}", "error"
                except Exception as e:
                    get_logger().error(f"Error fetching image: {img_path}", e)
                    return f"Error fetching image: {img_path}", "error"
                messages[1]["content"] = [{"type": "text", "text": messages[1]["content"]},
                                          {"type": "image_url", "image_url": {"url": img_path}}]

            thinking_kwargs_gpt5 = None
            if model.startswith('gpt-5'):
                # Use configured reasoning_effort or default to MEDIUM
                config_effort = get_settings().config.reasoning_effort
                try:
                    ReasoningEffort(config_effort)
                    effort = config_effort
                except (ValueError, TypeError):
                    effort = ReasoningEffort.MEDIUM.value
                    if config_effort is not None:
                        get_logger().warning(
                            f"Invalid reasoning_effort '{config_effort}' in config. "
                            f"Using default '{effort}'. Valid values: {[e.value for e in ReasoningEffort]}"
                        )

                thinking_kwargs_gpt5 = {
                    "reasoning_effort": effort,
                    "allowed_openai_params": ["reasoning_effort"],
                }
                get_logger().info(f"Using reasoning_effort='{effort}' for GPT-5 model")
                model = 'openai/'+model.replace('_thinking', '')  # remove _thinking suffix


            # Currently, some models do not support a separate system and user prompts
            if model in self.user_message_only_models or get_settings().config.custom_reasoning_model:
                user = f"{system}\n\n\n{user}"
                system = ""
                get_logger().info(f"Using model {model}, combining system and user prompts")
                messages = [{"role": "user", "content": user}]
                kwargs = {
                    "model": model,
                    "deployment_id": deployment_id,
                    "messages": messages,
                    "timeout": get_settings().config.ai_timeout,
                    "api_base": self.api_base,
                }
            else:
                kwargs = {
                    "model": model,
                    "deployment_id": deployment_id,
                    "messages": messages,
                    "timeout": get_settings().config.ai_timeout,
                    "api_base": self.api_base,
                }

            # Add temperature only if model supports it
            if model not in self.no_support_temperature_models and not get_settings().config.custom_reasoning_model:
                # get_logger().info(f"Adding temperature with value {temperature} to model {model}.")
                kwargs["temperature"] = temperature

            if thinking_kwargs_gpt5:
                kwargs.update(thinking_kwargs_gpt5)
                if 'temperature' in kwargs:
                    del kwargs['temperature']

            # Add reasoning_effort if model supports it
            if model in self.support_reasoning_models:
                config_effort = get_settings().config.reasoning_effort
                try:
                    ReasoningEffort(config_effort)
                    reasoning_effort = config_effort
                except (ValueError, TypeError):
                    reasoning_effort = ReasoningEffort.MEDIUM.value
                    if config_effort is not None:
                        get_logger().warning(
                            f"Invalid reasoning_effort '{config_effort}' in config. "
                            f"Using default '{reasoning_effort}'. Valid values: {[e.value for e in ReasoningEffort]}"
                        )

                get_logger().info(f"Adding reasoning_effort with value {reasoning_effort} to model {model}.")
                kwargs["reasoning_effort"] = reasoning_effort

            # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
            if (model in self.claude_extended_thinking_models) and get_settings().config.get("enable_claude_extended_thinking", False):
                kwargs = self._configure_claude_extended_thinking(model, kwargs)

            if get_settings().litellm.get("enable_callbacks", False):
                kwargs = self.add_litellm_callbacks(kwargs)

            seed = get_settings().config.get("seed", -1)
            if temperature > 0 and seed >= 0:
                raise ValueError(f"Seed ({seed}) is not supported with temperature ({temperature}) > 0")
            elif seed >= 0:
                get_logger().info(f"Using fixed seed of {seed}")
                kwargs["seed"] = seed

            if self.repetition_penalty:
                kwargs["repetition_penalty"] = self.repetition_penalty

            #Added support for extra_headers while using litellm to call underlying model, via a api management gateway, would allow for passing custom headers for security and authorization
            if get_settings().get("LITELLM.EXTRA_HEADERS", None):
                try:
                    litellm_extra_headers = json.loads(get_settings().litellm.extra_headers)
                    if not isinstance(litellm_extra_headers, dict):
                        raise ValueError("LITELLM.EXTRA_HEADERS must be a JSON object")
                except json.JSONDecodeError as e:
                    raise ValueError(f"LITELLM.EXTRA_HEADERS contains invalid JSON: {str(e)}")
                kwargs["extra_headers"] = litellm_extra_headers

            # Support for custom OpenAI body fields (e.g., Flex Processing)
            kwargs = _process_litellm_extra_body(kwargs)

            # Support for Bedrock custom inference profile via model_id
            model_id = get_settings().get("litellm.model_id")
            if model_id and 'bedrock/' in model:
                kwargs["model_id"] = model_id
                get_logger().info(f"Using Bedrock custom inference profile: {model_id}")

            get_logger().debug("Prompts", artifact={"system": system, "user": user})

            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"\nSystem prompt:\n{system}")
                get_logger().info(f"\nUser prompt:\n{user}")

            # Get completion with automatic streaming detection
            resp, finish_reason, response_obj = await self._get_completion(**kwargs)

        except openai.RateLimitError as e:
            get_logger().error(f"Rate limit error during LLM inference: {e}")
            raise
        except openai.APIError as e:
            get_logger().warning(f"Error during LLM inference: {e}")
            raise
        except Exception as e:
            get_logger().warning(f"Unknown error during LLM inference: {e}")
            raise openai.APIError from e

        get_logger().debug(f"\nAI response:\n{resp}")

        # log the full response for debugging
        response_log = self.prepare_logs(response_obj, system, user, resp, finish_reason)
        get_logger().debug("Full_response", artifact=response_log)

        # for CLI debugging
        if get_settings().config.verbosity_level >= 2:
            get_logger().info(f"\nAI response:\n{resp}")

        return resp, finish_reason

    async def _get_completion(self, **kwargs):
        """
        Wrapper that automatically handles streaming for required models.
        """
        model = kwargs["model"]
        if model in self.streaming_required_models:
            kwargs["stream"] = True
            get_logger().info(f"Using streaming mode for model {model}")
            response = await acompletion(**kwargs)
            resp, finish_reason = await _handle_streaming_response(response)
            # Create MockResponse for streaming since we don't have the full response object
            mock_response = MockResponse(resp, finish_reason)
            return resp, finish_reason, mock_response
        else:
            response = await acompletion(**kwargs)
            if response is None or len(response["choices"]) == 0:
                raise openai.APIError
            return (response["choices"][0]['message']['content'],
                    response["choices"][0]["finish_reason"],
                    response)


================================================
FILE: pr_agent/algo/ai_handlers/litellm_helpers.py
================================================
import json

import openai

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger


async def _handle_streaming_response(response):
    """
    Handle streaming response from acompletion and collect the full response.

    Args:
        response: The streaming response object from acompletion

    Returns:
        tuple: (full_response_content, finish_reason)
    """
    full_response = ""
    finish_reason = None

    try:
        async for chunk in response:
            if chunk.choices and len(chunk.choices) > 0:
                choice = chunk.choices[0]
                delta = choice.delta
                content = getattr(delta, 'content', None)
                if content:
                    full_response += content
                if choice.finish_reason:
                    finish_reason = choice.finish_reason
    except Exception as e:
        get_logger().error(f"Error handling streaming response: {e}")
        raise

    if not full_response and finish_reason is None:
        get_logger().warning("Streaming response resulted in empty content with no finish reason")
        raise openai.APIError("Empty streaming response received without proper completion")
    elif not full_response and finish_reason:
        get_logger().debug(f"Streaming response resulted in empty content but completed with finish_reason: {finish_reason}")
        raise openai.APIError(f"Streaming response completed with finish_reason '{finish_reason}' but no content received")
    return full_response, finish_reason


class MockResponse:
    """Mock response object for streaming models to enable consistent logging."""

    def __init__(self, resp, finish_reason):
        self._data = {
            "choices": [
                {
                    "message": {"content": resp},
                    "finish_reason": finish_reason
                }
            ]
        }

    def dict(self):
        return self._data


def _get_azure_ad_token():
    """
    Generates an access token using Azure AD credentials from settings.
    Returns:
        str: The access token
    """
    from azure.identity import ClientSecretCredential
    try:
        credential = ClientSecretCredential(
            tenant_id=get_settings().azure_ad.tenant_id,
            client_id=get_settings().azure_ad.client_id,
            client_secret=get_settings().azure_ad.client_secret
        )
        # Get token for Azure OpenAI service
        token = credential.get_token("https://cognitiveservices.azure.com/.default")
        return token.token
    except Exception as e:
        get_logger().error(f"Failed to get Azure AD token: {e}")
        raise


def _process_litellm_extra_body(kwargs: dict) -> dict:
    """
    Process LITELLM.EXTRA_BODY configuration and update kwargs accordingly.

    Args:
        kwargs: The current kwargs dictionary to update

    Returns:
        Updated kwargs dictionary

    Raises:
        ValueError: If extra_body contains invalid JSON, unsupported keys, or colliding keys
    """
    allowed_extra_body_keys = {"processing_mode", "service_tier"}
    extra_body = getattr(getattr(get_settings(), "litellm", None), "extra_body", None)
    if extra_body:
        try:
            litellm_extra_body = json.loads(extra_body)
            if not isinstance(litellm_extra_body, dict):
                raise ValueError("LITELLM.EXTRA_BODY must be a JSON object")
            unsupported_keys = set(litellm_extra_body.keys()) - allowed_extra_body_keys
            if unsupported_keys:
                raise ValueError(f"LITELLM.EXTRA_BODY contains unsupported keys: {', '.join(unsupported_keys)}. Allowed keys: {', '.join(allowed_extra_body_keys)}")
            colliding_keys = kwargs.keys() & litellm_extra_body.keys()
            if colliding_keys:
                raise ValueError(f"LITELLM.EXTRA_BODY cannot override existing parameters: {', '.join(colliding_keys)}")
            kwargs.update(litellm_extra_body)
        except json.JSONDecodeError as e:
            raise ValueError(f"LITELLM.EXTRA_BODY contains invalid JSON: {str(e)}")
    return kwargs

================================================
FILE: pr_agent/algo/ai_handlers/openai_ai_handler.py
================================================
from os import environ
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
import openai
from openai import AsyncOpenAI
from tenacity import retry, retry_if_exception_type, retry_if_not_exception_type, stop_after_attempt

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger

OPENAI_RETRIES = 5


class OpenAIHandler(BaseAiHandler):
    def __init__(self):
        # Initialize OpenAIHandler specific attributes here
        try:
            super().__init__()
            environ["OPENAI_API_KEY"] = get_settings().openai.key
            if get_settings().get("OPENAI.ORG", None):
                openai.organization = get_settings().openai.org
            if get_settings().get("OPENAI.API_TYPE", None):
                if get_settings().openai.api_type == "azure":
                    self.azure = True
                    openai.azure_key = get_settings().openai.key
            if get_settings().get("OPENAI.API_VERSION", None):
                openai.api_version = get_settings().openai.api_version
            if get_settings().get("OPENAI.API_BASE", None):
                environ["OPENAI_BASE_URL"] = get_settings().openai.api_base

        except AttributeError as e:
            raise ValueError("OpenAI key is required") from e

    @property
    def deployment_id(self):
        """
        Returns the deployment ID for the OpenAI API.
        """
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

    @retry(
        retry=retry_if_exception_type(openai.APIError) & retry_if_not_exception_type(openai.RateLimitError),
        stop=stop_after_attempt(OPENAI_RETRIES),
    )
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
        try:
            if img_path:
                get_logger().warning(f"Image path is not supported for OpenAIHandler. Ignoring image path: {img_path}")
            get_logger().info("System: ", system)
            get_logger().info("User: ", user)
            messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
            client = AsyncOpenAI()
            chat_completion = await client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
            )
            resp = chat_completion.choices[0].message.content
            finish_reason = chat_completion.choices[0].finish_reason
            usage = chat_completion.usage
            get_logger().info("AI response", response=resp, messages=messages, finish_reason=finish_reason,
                              model=model, usage=usage)
            return resp, finish_reason
        except openai.RateLimitError as e:
            get_logger().error(f"Rate limit error during LLM inference: {e}")
            raise
        except openai.APIError as e:
            get_logger().warning(f"Error during LLM inference: {e}")
            raise
        except Exception as e:
            get_logger().warning(f"Unknown error during LLM inference: {e}")
            raise openai.APIError from e


================================================
FILE: pr_agent/algo/cli_args.py
================================================
from base64 import b64decode, encode, b64encode
import hashlib

class CliArgs:
    @staticmethod
    def validate_user_args(args: list) -> (bool, str):
        try:
            if not args:
                return True, ""

            # decode forbidden args
            # b64encode('word'.encode()).decode()
            _encoded_args = 'c2hhcmVkX3NlY3JldA==:dXNlcg==:c3lzdGVt:ZW5hYmxlX2NvbW1lbnRfYXBwcm92YWw=:ZW5hYmxlX21hbnVhbF9hcHByb3ZhbA==:ZW5hYmxlX2F1dG9fYXBwcm92YWw=:YXBwcm92ZV9wcl9vbl9zZWxmX3Jldmlldw==:YmFzZV91cmw=:dXJs:YXBwX25hbWU=:c2VjcmV0X3Byb3ZpZGVy:Z2l0X3Byb3ZpZGVy:c2tpcF9rZXlz:b3BlbmFpLmtleQ==:QU5BTFlUSUNTX0ZPTERFUg==:dXJp:YXBwX2lk:d2ViaG9va19zZWNyZXQ=:YmVhcmVyX3Rva2Vu:UEVSU09OQUxfQUNDRVNTX1RPS0VO:b3ZlcnJpZGVfZGVwbG95bWVudF90eXBl:cHJpdmF0ZV9rZXk=:bG9jYWxfY2FjaGVfcGF0aA==:ZW5hYmxlX2xvY2FsX2NhY2hl:amlyYV9iYXNlX3VybA==:YXBpX2Jhc2U=:YXBpX3R5cGU=:YXBpX3ZlcnNpb24=:c2tpcF9rZXlz'

            forbidden_cli_args = []
            for e in _encoded_args.split(':'):
                forbidden_cli_args.append(b64decode(e).decode())

            # lowercase all forbidden args
            for i, _ in enumerate(forbidden_cli_args):
                forbidden_cli_args[i] = forbidden_cli_args[i].lower()
                if '.' not in forbidden_cli_args[i]:
                    forbidden_cli_args[i] = '.' + forbidden_cli_args[i]

            for arg in args:
                if arg.startswith('--'):
                    arg_word = arg.lower()
                    arg_word = arg_word.replace('__', '.')  # replace double underscore with dot, e.g. --openai__key -> --openai.key
                    for forbidden_arg_word in forbidden_cli_args:
                        if forbidden_arg_word in arg_word:
                            return False, forbidden_arg_word
            return True, ""
        except Exception as e:
            return False, str(e)


================================================
FILE: pr_agent/algo/file_filter.py
================================================
import fnmatch
import re

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger


def filter_ignored(files, platform = 'github'):
    """
    Filter out files that match the ignore patterns.
    """

    try:
        # load regex patterns, and translate glob patterns to regex
        patterns = get_settings().ignore.regex
        if isinstance(patterns, str):
            patterns = [patterns]
        glob_setting = get_settings().ignore.glob
        if isinstance(glob_setting, str):  # --ignore.glob=[.*utils.py], --ignore.glob=.*utils.py
            glob_setting = glob_setting.strip('[]').split(",")
        patterns += translate_globs_to_regexes(glob_setting)

        code_generators = get_settings().config.get('ignore_language_framework', [])
        if isinstance(code_generators, str):
            get_logger().warning("'ignore_language_framework' should be a list. Skipping language framework filtering.")
            code_generators = []
        for cg in code_generators:
            glob_patterns = get_settings().generated_code.get(cg, [])
            if isinstance(glob_patterns, str):
                glob_patterns = [glob_patterns]
            patterns += translate_globs_to_regexes(glob_patterns)

        # compile all valid patterns
        compiled_patterns = []
        for r in patterns:
            try:
                compiled_patterns.append(re.compile(r))
            except re.error:
                pass

        # keep filenames that _don't_ match the ignore regex
        if files and isinstance(files, list):
            for r in compiled_patterns:
                if platform == 'github':
                    files = [f for f in files if (f.filename and not r.match(f.filename))]
                elif platform == 'bitbucket':
                    # files = [f for f in files if (f.new.path and not r.match(f.new.path))]
                    files_o = []
                    for f in files:
                        if hasattr(f, 'new'):
                            if f.new and f.new.path and not r.match(f.new.path):
                                files_o.append(f)
                                continue
                        if hasattr(f, 'old'):
                            if f.old and f.old.path and not r.match(f.old.path):
                                files_o.append(f)
                                continue
                    files = files_o
                elif platform == 'bitbucket_server':
                    files = [f for f in files if f.get('path', {}).get('toString') and not r.match(f['path']['toString'])]
                elif platform == 'gitlab':
                    # files = [f for f in files if (f['new_path'] and not r.match(f['new_path']))]
                    files_o = []
                    for f in files:
                        if 'new_path' in f and f['new_path'] and not r.match(f['new_path']):
                            files_o.append(f)
                            continue
                        if 'old_path' in f and f['old_path'] and not r.match(f['old_path']):
                            files_o.append(f)
                            continue
                    files = files_o
                elif platform == 'azure':
                    files = [f for f in files if not r.match(f)]
                elif platform == 'gitea':
                    files = [f for f in files if not r.match(f.get("filename", ""))]


    except Exception as e:
        print(f"Could not filter file list: {e}")

    return files

def translate_globs_to_regexes(globs: list):
    regexes = []
    for pattern in globs:
        regexes.append(fnmatch.translate(pattern))
        if pattern.startswith("**/"): # cover root-level files
            regexes.append(fnmatch.translate(pattern[3:]))
    return regexes


================================================
FILE: pr_agent/algo/git_patch_processing.py
================================================
from __future__ import annotations

import re
import traceback

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger


def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0,
                 patch_extra_lines_after=0, filename: str = "", new_file_str="") -> str:
    if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0) or not original_file_str:
        return patch_str

    original_file_str = decode_if_bytes(original_file_str)
    new_file_str = decode_if_bytes(new_file_str)
    if not original_file_str:
        return patch_str

    if should_skip_patch(filename):
        return patch_str

    try:
        extended_patch_str = process_patch_lines(patch_str, original_file_str,
                                                 patch_extra_lines_before, patch_extra_lines_after, new_file_str)
    except Exception as e:
        get_logger().warning(f"Failed to extend patch: {e}", artifact={"traceback": traceback.format_exc()})
        return patch_str

    return extended_patch_str


def decode_if_bytes(original_file_str):
    if isinstance(original_file_str, (bytes, bytearray)):
        try:
            return original_file_str.decode('utf-8')
        except UnicodeDecodeError:
            encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16']
            for encoding in encodings_to_try:
                try:
                    return original_file_str.decode(encoding)
                except UnicodeDecodeError:
                    continue
            return ""
    return original_file_str


def should_skip_patch(filename):
    patch_extension_skip_types = get_settings().config.patch_extension_skip_types
    if patch_extension_skip_types and filename:
        return any(filename.endswith(skip_type) for skip_type in patch_extension_skip_types)
    return False


def process_patch_lines(patch_str, original_file_str, patch_extra_lines_before, patch_extra_lines_after, new_file_str=""):
    allow_dynamic_context = get_settings().config.allow_dynamic_context
    patch_extra_lines_before_dynamic = get_settings().config.max_extra_lines_before_dynamic_context

    file_original_lines = original_file_str.splitlines()
    file_new_lines = new_file_str.splitlines() if new_file_str else []
    len_original_lines = len(file_original_lines)
    patch_lines = patch_str.splitlines()
    extended_patch_lines = []

    is_valid_hunk = True
    start1, size1, start2, size2 = -1, -1, -1, -1
    RE_HUNK_HEADER = re.compile(
        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
    try:
        for i,line in enumerate(patch_lines):
            if line.startswith('@@'):
                match = RE_HUNK_HEADER.match(line)
                # identify hunk header
                if match:
                    # finish processing previous hunk
                    if is_valid_hunk and (start1 != -1 and patch_extra_lines_after > 0):
                        delta_lines_original = [f' {line}' for line in file_original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]]
                        extended_patch_lines.extend(delta_lines_original)

                    section_header, size1, size2, start1, start2 = extract_hunk_headers(match)

                    is_valid_hunk = check_if_hunk_lines_matches_to_file(i, file_original_lines, patch_lines, start1)

                    if is_valid_hunk and (patch_extra_lines_before > 0 or patch_extra_lines_after > 0):
                        def _calc_context_limits(patch_lines_before):
                            extended_start1 = max(1, start1 - patch_lines_before)
                            extended_size1 = size1 + (start1 - extended_start1) + patch_extra_lines_after
                            extended_start2 = max(1, start2 - patch_lines_before)
                            extended_size2 = size2 + (start2 - extended_start2) + patch_extra_lines_after
                            if extended_start1 - 1 + extended_size1 > len_original_lines:
                                # we cannot extend beyond the original file
                                delta_cap = extended_start1 - 1 + extended_size1 - len_original_lines
                                extended_size1 = max(extended_size1 - delta_cap, size1)
                                extended_size2 = max(extended_size2 - delta_cap, size2)
                            return extended_start1, extended_size1, extended_start2, extended_size2

                        if allow_dynamic_context and file_new_lines:
                            extended_start1, extended_size1, extended_start2, extended_size2 = \
                                _calc_context_limits(patch_extra_lines_before_dynamic)

                            lines_before_original = file_original_lines[extended_start1 - 1:start1 - 1]
                            lines_before_new = file_new_lines[extended_start2 - 1:start2 - 1]
                            found_header = False
                            for i, line in enumerate(lines_before_original):
                                if section_header in line:
                                    # Update start and size in one line each
                                    extended_start1, extended_start2 = extended_start1 + i, extended_start2 + i
                                    extended_size1, extended_size2 = extended_size1 - i, extended_size2 - i
                                    lines_before_original_dynamic_context = lines_before_original[i:]
                                    lines_before_new_dynamic_context = lines_before_new[i:]
                                    if lines_before_original_dynamic_context == lines_before_new_dynamic_context:
                                        # get_logger().debug(f"found dynamic context match for section header: {section_header}")
                                        found_header = True
                                        section_header = ''
                                    else:
                                        pass  # its ok to be here. We cant apply dynamic context if the lines are different if 'old' and 'new' hunks
                                    break

                            if not found_header:
                                # get_logger().debug(f"Section header not found in the extra lines before the hunk")
                                extended_start1, extended_size1, extended_start2, extended_size2 = \
                                    _calc_context_limits(patch_extra_lines_before)
                        else:
                            extended_start1, extended_size1, extended_start2, extended_size2 = \
                                _calc_context_limits(patch_extra_lines_before)

                        # check if extra lines before hunk are different in original and new file
                        delta_lines_original = [f' {line}' for line in file_original_lines[extended_start1 - 1:start1 - 1]]
                        if file_new_lines:
                            delta_lines_new = [f' {line}' for line in file_new_lines[extended_start2 - 1:start2 - 1]]
                            if delta_lines_original != delta_lines_new:
                                found_mini_match = False
                                for i in range(len(delta_lines_original)):
                                    if delta_lines_original[i:] == delta_lines_new[i:]:
                                        delta_lines_original = delta_lines_original[i:]
                                        delta_lines_new = delta_lines_new[i:]
                                        extended_start1 += i
                                        extended_size1 -= i
                                        extended_start2 += i
                                        extended_size2 -= i
                                        found_mini_match = True
                                        break
                                if not found_mini_match:
                                    extended_start1 = start1
                                    extended_size1 = size1
                                    extended_start2 = start2
                                    extended_size2 = size2
                                    delta_lines_original = []
                                    # get_logger().debug(f"Extra lines before hunk are different in original and new file",
                                    #                    artifact={"delta_lines_original": delta_lines_original,
                                    #                              "delta_lines_new": delta_lines_new})

                        #  logic to remove section header if its in the extra delta lines (in dynamic context, this is also done)
                        if section_header and not allow_dynamic_context:
                            for line in delta_lines_original:
                                if section_header in line:
                                    section_header = ''  # remove section header if it is in the extra delta lines
                                    break
                    else:
                        extended_start1 = start1
                        extended_size1 = size1
                        extended_start2 = start2
                        extended_size2 = size2
                        delta_lines_original = []
                    extended_patch_lines.append('')
                    extended_patch_lines.append(
                        f'@@ -{extended_start1},{extended_size1} '
                        f'+{extended_start2},{extended_size2} @@ {section_header}')
                    extended_patch_lines.extend(delta_lines_original)  # one to zero based
                    continue
            extended_patch_lines.append(line)
    except Exception as e:
        get_logger().warning(f"Failed to extend patch: {e}", artifact={"traceback": traceback.format_exc()})
        return patch_str

    # finish processing last hunk
    if start1 != -1 and patch_extra_lines_after > 0 and is_valid_hunk:
        delta_lines_original = file_original_lines[start1 + size1 - 1:start1 + size1 - 1 + patch_extra_lines_after]
        # add space at the beginning of each extra line
        delta_lines_original = [f' {line}' for line in delta_lines_original]
        extended_patch_lines.extend(delta_lines_original)

    extended_patch_str = '\n'.join(extended_patch_lines)
    return extended_patch_str

def check_if_hunk_lines_matches_to_file(i, original_lines, patch_lines, start1):
    """
    Check if the hunk lines match the original file content. We saw cases where the hunk header line doesn't match the original file content, and then
    extending the hunk with extra lines before the hunk header can cause the hunk to be invalid.
    """
    is_valid_hunk = True
    try:
        if i + 1 < len(patch_lines) and patch_lines[i + 1][0] == ' ': # an existing line in the file
            if patch_lines[i + 1].strip() != original_lines[start1 - 1].strip():
                # check if different encoding is needed
                original_line = original_lines[start1 - 1].strip()
                for encoding in ['iso-8859-1', 'latin-1', 'ascii', 'utf-16']:
                    try:
                        if original_line.encode(encoding).decode().strip() == patch_lines[i + 1].strip():
                            get_logger().info(f"Detected different encoding in hunk header line {start1}, needed encoding: {encoding}")
                            return False # we still want to avoid extending the hunk. But we don't want to log an error
                    except:
                        pass

                is_valid_hunk = False
                get_logger().info(
                    f"Invalid hunk in PR, line {start1} in hunk header doesn't match the original file content")
    except:
        pass
    return is_valid_hunk


def extract_hunk_headers(match):
    res = list(match.groups())
    for i in range(len(res)):
        if res[i] is None:
            res[i] = 0
    try:
        start1, size1, start2, size2 = map(int, res[:4])
    except:  # '@@ -0,0 +1 @@' case
        start1, size1, size2 = map(int, res[:3])
        start2 = 0
    section_header = res[4]
    return section_header, size1, size2, start1, start2


def omit_deletion_hunks(patch_lines) -> str:
    """
    Omit deletion hunks from the patch and return the modified patch.
    Args:
    - patch_lines: a list of strings representing the lines of the patch
    Returns:
    - A string representing the modified patch with deletion hunks omitted
    """

    temp_hunk = []
    added_patched = []
    add_hunk = False
    inside_hunk = False
    RE_HUNK_HEADER = re.compile(
        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)")

    for line in patch_lines:
        if line.startswith('@@'):
            match = RE_HUNK_HEADER.match(line)
            if match:
                # finish previous hunk
                if inside_hunk and add_hunk:
                    added_patched.extend(temp_hunk)
                    temp_hunk = []
                    add_hunk = False
                temp_hunk.append(line)
                inside_hunk = True
        else:
            temp_hunk.append(line)
            if line:
                edit_type = line[0]
                if edit_type == '+':
                    add_hunk = True
    if inside_hunk and add_hunk:
        added_patched.extend(temp_hunk)

    return '\n'.join(added_patched)


def handle_patch_deletions(patch: str, original_file_content_str: str,
                           new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str:
    """
    Handle entire file or deletion patches.

    This function takes a patch, original file content, new file content, and file name as input.
    It handles entire file or deletion patches and returns the modified patch with deletion hunks omitted.

    Args:
        patch (str): The patch to be handled.
        original_file_content_str (str): The original content of the file.
        new_file_content_str (str): The new content of the file.
        file_name (str): The name of the file.

    Returns:
        str: The modified patch with deletion hunks omitted.

    """
    if not new_file_content_str and (edit_type == EDIT_TYPE.DELETED or edit_type == EDIT_TYPE.UNKNOWN):
        # logic for handling deleted files - don't show patch, just show that the file was deleted
        if get_settings().config.verbosity_level > 0:
            get_logger().info(f"Processing file: {file_name}, minimizing deletion file")
        patch = None # file was deleted
    else:
        patch_lines = patch.splitlines()
        patch_new = omit_deletion_hunks(patch_lines)
        if patch != patch_new:
            if get_settings().config.verbosity_level > 0:
                get_logger().info(f"Processing file: {file_name}, hunks were deleted")
            patch = patch_new
    return patch


def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    """
    Convert a given patch string into a string with line numbers for each hunk, indicating the new and old content of
    the file.

    Args:
        patch (str): The patch string to be converted.
        file: An object containing the filename of the file being patched.

    Returns:
        str: A string with line numbers for each hunk, indicating the new and old content of the file.

    example output:
## src/file.ts
__new hunk__
881        line1
882        line2
883        line3
887 +      line4
888 +      line5
889        line6
890        line7
...
__old hunk__
        line1
        line2
-       line3
-       line4
        line5
        line6
           ...
    """

    # Add a header for the file
    if file:
        # if the file was deleted, return a message indicating that the file was deleted
        if hasattr(file, 'edit_type') and file.edit_type == EDIT_TYPE.DELETED:
            return f"\n\n## File '{file.filename.strip()}' was deleted\n"

        patch_with_lines_str = f"\n\n## File: '{file.filename.strip()}'\n"
    else:
        patch_with_lines_str = ""

    patch_lines = patch.splitlines()
    RE_HUNK_HEADER = re.compile(
        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
    new_content_lines = []
    old_content_lines = []
    match = None
    start1, size1, start2, size2 = -1, -1, -1, -1
    prev_header_line = []
    header_line = []
    for line_i, line in enumerate(patch_lines):
        if 'no newline at end of file' in line.lower():
            continue

        if line.startswith('@@'):
            header_line = line
            match = RE_HUNK_HEADER.match(line)
            if match and (new_content_lines or old_content_lines):  # found a new hunk, split the previous lines
                if prev_header_line:
                    patch_with_lines_str += f'\n{prev_header_line}\n'
                is_plus_lines = is_minus_lines = False
                if new_content_lines:
                    is_plus_lines = any([line.startswith('+') for line in new_content_lines])
                if old_content_lines:
                    is_minus_lines = any([line.startswith('-') for line in old_content_lines])
                if is_plus_lines or is_minus_lines: # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused
                    patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n'
                    for i, line_new in enumerate(new_content_lines):
                        patch_with_lines_str += f"{start2 + i} {line_new}\n"
                if is_minus_lines:
                    patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
                    for line_old in old_content_lines:
                        patch_with_lines_str += f"{line_old}\n"
                new_content_lines = []
                old_content_lines = []
            if match:
                prev_header_line = header_line

            section_header, size1, size2, start1, start2 = extract_hunk_headers(match)

        elif line.startswith('+'):
            new_content_lines.append(line)
        elif line.startswith('-'):
            old_content_lines.append(line)
        else:
            if not line and line_i: # if this line is empty and the next line is a hunk header, skip it
                if line_i + 1 < len(patch_lines) and patch_lines[line_i + 1].startswith('@@'):
                    continue
                elif line_i + 1 == len(patch_lines):
                    continue
            new_content_lines.append(line)
            old_content_lines.append(line)

    # finishing last hunk
    if match and new_content_lines:
        patch_with_lines_str += f'\n{header_line}\n'
        is_plus_lines = is_minus_lines = False
        if new_content_lines:
            is_plus_lines = any([line.startswith('+') for line in new_content_lines])
        if old_content_lines:
            is_minus_lines = any([line.startswith('-') for line in old_content_lines])
        if is_plus_lines or is_minus_lines:  # notice 'True' here - we always present __new hunk__ for section, otherwise LLM gets confused
            patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n'
            for i, line_new in enumerate(new_content_lines):
                patch_with_lines_str += f"{start2 + i} {line_new}\n"
        if is_minus_lines:
            patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
            for line_old in old_content_lines:
                patch_with_lines_str += f"{line_old}\n"

    return patch_with_lines_str.rstrip()


def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side, remove_trailing_chars: bool = True) -> tuple[str, str]:
    try:
        patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
        selected_lines = ""
        patch_lines = patch.splitlines()
        RE_HUNK_HEADER = re.compile(
            r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
        match = None
        start1, size1, start2, size2 = -1, -1, -1, -1
        skip_hunk = False
        selected_lines_num = 0
        for line in patch_lines:
            if 'no newline at end of file' in line.lower():
                continue

            if line.startswith('@@'):
                skip_hunk = False
                selected_lines_num = 0
                header_line = line

                match = RE_HUNK_HEADER.match(line)

                section_header, size1, size2, start1, start2 = extract_hunk_headers(match)

                # check if line range is in this hunk
                if side.lower() == 'left':
                    # check if line range is in this hunk
                    if not (start1 <= line_start <= start1 + size1):
                        skip_hunk = True
                        continue
                elif side.lower() == 'right':
                    if not (start2 <= line_start <= start2 + size2):
                        skip_hunk = True
                        continue
                patch_with_lines_str += f'\n{header_line}\n'

            elif not skip_hunk:
                if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:
                    selected_lines += line + '\n'
                if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:
                    selected_lines += line + '\n'
                patch_with_lines_str += line + '\n'
                if not line.startswith('-'): # currently we don't support /ask line for deleted lines
                    selected_lines_num += 1
    except Exception as e:
        get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()})
        return "", ""

    if remove_trailing_chars:
        patch_with_lines_str = patch_with_lines_str.rstrip()
        selected_lines = selected_lines.rstrip()

    return patch_with_lines_str, selected_lines


================================================
FILE: pr_agent/algo/language_handler.py
================================================
# Language Selection, source: https://github.com/bigcode-project/bigcode-dataset/blob/main/language_selection/programming-languages-to-file-extensions.json  # noqa E501
from typing import Dict

from pr_agent.config_loader import get_settings


def filter_bad_extensions(files):
    # Bad Extensions, source: https://github.com/EleutherAI/github-downloader/blob/345e7c4cbb9e0dc8a0615fd995a08bf9d73b3fe6/download_repo_text.py  # noqa: E501
    bad_extensions = get_settings().bad_extensions.default
    if get_settings().config.use_extra_bad_extensions:
        bad_extensions += get_settings().bad_extensions.extra
    return [f for f in files if f.filename is not None and is_valid_file(f.filename, bad_extensions)]


def is_valid_file(filename:str, bad_extensions=None) -> bool:
    if not filename:
        return False
    if not bad_extensions:
        bad_extensions = get_settings().bad_extensions.default
        if get_settings().config.use_extra_bad_extensions:
            bad_extensions += get_settings().bad_extensions.extra

    auto_generated_files = ['package-lock.json', 'yarn.lock', 'composer.lock', 'Gemfile.lock', 'poetry.lock']
    for forbidden_file in auto_generated_files:
        if filename.endswith(forbidden_file):
            return False

    return filename.split('.')[-1] not in bad_extensions


def sort_files_by_main_languages(languages: Dict, files: list):
    """
    Sort files by their main language, put the files that are in the main language first and the rest files after
    """
    # sort languages by their size
    languages_sorted_list = [k for k, v in sorted(languages.items(), key=lambda item: item[1], reverse=True)]
    # languages_sorted = sorted(languages, key=lambda x: x[1], reverse=True)
    # get all extensions for the languages
    main_extensions = []
    language_extension_map_org = get_settings().language_extension_map_org
    language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}
    for language in languages_sorted_list:
        if language.lower() in language_extension_map:
            main_extensions.append(language_extension_map[language.lower()])
        else:
            main_extensions.append([])

    # filter out files bad extensions
    files_filtered = filter_bad_extensions(files)

    # sort files by their extension, put the files that are in the main extension first
    # and the rest files after, map languages_sorted to their respective files
    files_sorted = []
    rest_files = {}

    # if no languages detected, put all files in the "Other" category
    if not languages:
        files_sorted = [({"language": "Other", "files": list(files_filtered)})]
        return files_sorted

    main_extensions_flat = []
    for ext in main_extensions:
        main_extensions_flat.extend(ext)

    for extensions, lang in zip(main_extensions, languages_sorted_list):  # noqa: B905
        tmp = []
        for file in files_filtered:
            extension_str = f".{file.filename.split('.')[-1]}"
            if extension_str in extensions:
                tmp.append(file)
            else:
                if (file.filename not in rest_files) and (extension_str not in main_extensions_flat):
                    rest_files[file.filename] = file
        if len(tmp) > 0:
            files_sorted.append({"language": lang, "files": tmp})
    files_sorted.append({"language": "Other", "files": list(rest_files.values())})
    return files_sorted


================================================
FILE: pr_agent/algo/pr_processing.py
================================================
from __future__ import annotations

import traceback
from typing import Callable, List, Tuple

from github import RateLimitExceededException

from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.git_patch_processing import (
    extend_patch, handle_patch_deletions,
    decouple_and_convert_to_hunks_with_lines_numbers)
from pr_agent.algo.language_handler import sort_files_by_main_languages
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.algo.utils import ModelType, clip_tokens, get_max_tokens, get_model
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import GitProvider
from pr_agent.log import get_logger

DELETED_FILES_ = "Deleted files:\n"

MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to process):\n"

ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n"

OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1500
OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 1000
MAX_EXTRA_LINES = 10


def cap_and_log_extra_lines(value, direction) -> int:
    if value > MAX_EXTRA_LINES:
        get_logger().warning(f"patch_extra_lines_{direction} was {value}, capping to {MAX_EXTRA_LINES}")
        return MAX_EXTRA_LINES
    return value


def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler,
                model: str,
                add_line_numbers_to_hunks: bool = False,
                disable_extra_lines: bool = False,
                large_pr_handling=False,
                return_remaining_files=False):
    if disable_extra_lines:
        PATCH_EXTRA_LINES_BEFORE = 0
        PATCH_EXTRA_LINES_AFTER = 0
    else:
        PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before
        PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after
        PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, "before")
        PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after")

    try:
        diff_files = git_provider.get_diff_files()
    except RateLimitExceededException as e:
        get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
        raise

    # get pr languages
    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
    if pr_languages:
        try:
            get_logger().info(f"PR main language: {pr_languages[0]['language']}")
        except Exception as e:
            pass

    # generate a standard diff string, with patch extension
    patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
        pr_languages, token_handler, add_line_numbers_to_hunks,
        patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE, patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER)

    # if we are under the limit, return the full diff
    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
        get_logger().info(f"Tokens: {total_tokens}, total tokens under limit: {get_max_tokens(model)}, "
                          f"returning full diff.")
        return "\n".join(patches_extended)

    # if we are over the limit, start pruning (If we got here, we will not extend the patches with extra lines)
    get_logger().info(f"Tokens: {total_tokens}, total tokens over limit: {get_max_tokens(model)}, "
                      f"pruning diff.")
    patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \
        pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling)

    if large_pr_handling and len(patches_compressed_list) > 1:
        get_logger().info(f"Large PR handling mode, and found {len(patches_compressed_list)} patches with original diff.")
        return "" # return empty string, as we want to generate multiple patches with a different prompt

    # return the first patch
    patches_compressed = patches_compressed_list[0]
    total_tokens_new = total_tokens_list[0]
    files_in_patch = files_in_patches_list[0]

    # Insert additional information about added, modified, and deleted files if there is enough space
    max_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD
    curr_token = total_tokens_new  # == token_handler.count_tokens(final_diff)+token_handler.prompt_tokens
    final_diff = "\n".join(patches_compressed)
    delta_tokens = 10
    added_list_str = modified_list_str = deleted_list_str = ""
    unprocessed_files = []
    # generate the added, modified, and deleted files lists
    if (max_tokens - curr_token) > delta_tokens:
        for filename, file_values in file_dict.items():
            if filename in files_in_patch:
                continue
            if file_values['edit_type'] == EDIT_TYPE.ADDED:
                unprocessed_files.append(filename)
                if not added_list_str:
                    added_list_str = ADDED_FILES_ + f"\n{filename}"
                else:
                    added_list_str = added_list_str + f"\n{filename}"
            elif file_values['edit_type'] in [EDIT_TYPE.MODIFIED, EDIT_TYPE.RENAMED]:
                unprocessed_files.append(filename)
                if not modified_list_str:
                    modified_list_str = MORE_MODIFIED_FILES_ + f"\n{filename}"
                else:
                    modified_list_str = modified_list_str + f"\n{filename}"
            elif file_values['edit_type'] == EDIT_TYPE.DELETED:
                # unprocessed_files.append(filename) # not needed here, because the file was deleted, so no need to process it
                if not deleted_list_str:
                    deleted_list_str = DELETED_FILES_ + f"\n{filename}"
                else:
                    deleted_list_str = deleted_list_str + f"\n{filename}"

    # prune the added, modified, and deleted files lists, and add them to the final diff
    added_list_str = clip_tokens(added_list_str, max_tokens - curr_token)
    if added_list_str:
        final_diff = final_diff + "\n\n" + added_list_str
        curr_token += token_handler.count_tokens(added_list_str) + 2
    modified_list_str = clip_tokens(modified_list_str, max_tokens - curr_token)
    if modified_list_str:
        final_diff = final_diff + "\n\n" + modified_list_str
        curr_token += token_handler.count_tokens(modified_list_str) + 2
    deleted_list_str = clip_tokens(deleted_list_str, max_tokens - curr_token)
    if deleted_list_str:
        final_diff = final_diff + "\n\n" + deleted_list_str

    get_logger().debug(f"After pruning, added_list_str: {added_list_str}, modified_list_str: {modified_list_str}, "
                       f"deleted_list_str: {deleted_list_str}")
    if not return_remaining_files:
        return final_diff
    else:
        return final_diff, remaining_files_list


def get_pr_diff_multiple_patchs(git_provider: GitProvider, token_handler: TokenHandler, model: str,
                add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False):
    try:
        diff_files = git_provider.get_diff_files()
    except RateLimitExceededException as e:
        get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
        raise

    # get pr languages
    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)
    if pr_languages:
        try:
            get_logger().info(f"PR main language: {pr_languages[0]['language']}")
        except Exception as e:
            pass

    patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list = \
        pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks, large_pr_handling=True)

    return patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list


def pr_generate_extended_diff(pr_languages: list,
                              token_handler: TokenHandler,
                              add_line_numbers_to_hunks: bool,
                              patch_extra_lines_before: int = 0,
                              patch_extra_lines_after: int = 0) -> Tuple[list, int, list]:
    total_tokens = token_handler.prompt_tokens  # initial tokens
    patches_extended = []
    patches_extended_tokens = []
    for lang in pr_languages:
        for file in lang['files']:
            original_file_content_str = file.base_file
            new_file_content_str = file.head_file
            patch = file.patch
            if not patch:
                continue

            # extend each patch with extra lines of context
            extended_patch = extend_patch(original_file_content_str, patch,
                                          patch_extra_lines_before, patch_extra_lines_after, file.filename,
                                          new_file_str=new_file_content_str)
            if not extended_patch:
                get_logger().warning(f"Failed to extend patch for file: {file.filename}")
                continue

            if add_line_numbers_to_hunks:
                full_extended_patch = decouple_and_convert_to_hunks_with_lines_numbers(extended_patch, file)
            else:
                extended_patch = extended_patch.replace('\n@@ ', '\n\n@@ ') # add extra line before each hunk
                full_extended_patch = f"\n\n## File: '{file.filename.strip()}'\n\n{extended_patch.strip()}\n"

            # add AI-summary metadata to the patch
            if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
                full_extended_patch = add_ai_summary_top_patch(file, full_extended_patch)

            patch_tokens = token_handler.count_tokens(full_extended_patch)
            file.tokens = patch_tokens
            total_tokens += patch_tokens
            patches_extended_tokens.append(patch_tokens)
            patches_extended.append(full_extended_patch)

    return patches_extended, total_tokens, patches_extended_tokens


def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str,
                                convert_hunks_to_line_numbers: bool,
                                large_pr_handling: bool) -> Tuple[list, list, list, list, dict, list]:
    deleted_files_list = []

    # sort each one of the languages in top_langs by the number of tokens in the diff
    sorted_files = []
    for lang in top_langs:
        sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))

    # generate patches for each file, and count tokens
    file_dict = {}
    for file in sorted_files:
        original_file_content_str = file.base_file
        new_file_content_str = file.head_file
        patch = file.patch
        if not patch:
            continue

        # removing delete-only hunks
        patch = handle_patch_deletions(patch, original_file_content_str,
                                       new_file_content_str, file.filename, file.edit_type)
        if patch is None:
            if file.filename not in deleted_files_list:
                deleted_files_list.append(file.filename)
            continue

        if convert_hunks_to_line_numbers:
            patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)

        ## add AI-summary metadata to the patch (disabled, since we are in the compressed diff)
        # if file.ai_file_summary and get_settings().config.get('config.is_auto_command', False):
        #     patch = add_ai_summary_top_patch(file, patch)

        new_patch_tokens = token_handler.count_tokens(patch)
        file_dict[file.filename] = {'patch': patch, 'tokens': new_patch_tokens, 'edit_type': file.edit_type}

    max_tokens_model = get_max_tokens(model)

    # first iteration
    files_in_patches_list = []
    remaining_files_list =  [file.filename for file in sorted_files]
    patches_list =[]
    total_tokens_list = []
    total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers, file_dict,
                                       max_tokens_model, remaining_files_list, token_handler)
    patches_list.append(patches)
    total_tokens_list.append(total_tokens)
    files_in_patches_list.append(files_in_patch_list)

    # additional iterations (if needed)
    if large_pr_handling:
        NUMBER_OF_ALLOWED_ITERATIONS = get_settings().pr_description.max_ai_calls - 1 # one more call is to summarize
        for i in range(NUMBER_OF_ALLOWED_ITERATIONS-1):
            if remaining_files_list:
                total_tokens, patches, remaining_files_list, files_in_patch_list = generate_full_patch(convert_hunks_to_line_numbers,
                                                                                 file_dict,
                                                                                  max_tokens_model,
                                                                                  remaining_files_list, token_handler)
                if patches:
                    patches_list.append(patches)
                    total_tokens_list.append(total_tokens)
                    files_in_patches_list.append(files_in_patch_list)
            else:
                break

    return patches_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict, files_in_patches_list


def generate_full_patch(convert_hunks_to_line_numbers, file_dict, max_tokens_model,remaining_files_list_prev, token_handler):
    total_tokens = token_handler.prompt_tokens # initial tokens
    patches = []
    remaining_files_list_new = []
    files_in_patch_list = []
    for filename, data in file_dict.items():
        if filename not in remaining_files_list_prev:
            continue

        patch = data['patch']
        new_patch_tokens = data['tokens']
        edit_type = data['edit_type']

        # Hard Stop, no more tokens
        if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
            get_logger().warning(f"File was fully skipped, no more tokens: {filename}.")
            continue

        # If the patch is too large, just show the file name
        if total_tokens + new_patch_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
            # Current logic is to skip the patch if it's too large
            # TODO: Option for alternative logic to remove hunks from the patch to reduce the number of tokens
            #  until we meet the requirements
            if get_settings().config.verbosity_level >= 2:
                get_logger().warning(f"Patch too large, skipping it: '{filename}'")
            remaining_files_list_new.append(filename)
            continue

        if patch:
            if not convert_hunks_to_line_numbers:
                patch_final = f"\n\n## File: '{filename.strip()}'\n\n{patch.strip()}\n"
            else:
                patch_final = "\n\n" + patch.strip()
            patches.append(patch_final)
            total_tokens += token_handler.count_tokens(patch_final)
            files_in_patch_list.append(filename)
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Tokens: {total_tokens}, last filename: {filename}")
    return total_tokens, patches, remaining_files_list_new, files_in_patch_list


async def retry_with_fallback_models(f: Callable, model_type: ModelType = ModelType.REGULAR):
    all_models = _get_all_models(model_type)
    all_deployments = _get_all_deployments(all_models)
    # try each (model, deployment_id) pair until one is successful, otherwise raise exception
    for i, (model, deployment_id) in enumerate(zip(all_models, all_deployments)):
        try:
            get_logger().debug(
                f"Generating prediction with {model}"
                f"{(' from deployment ' + deployment_id) if deployment_id else ''}"
            )
            get_settings().set("openai.deployment_id", deployment_id)
            return await f(model)
        except Exception as e:
            get_logger().warning(
                f"Failed to generate prediction with {model}",
                artifact={"error": e},
            )
            if i == len(all_models) - 1:  # If it's the last iteration
                raise Exception(f"Failed to generate prediction with any model of {all_models}") from e


def _get_all_models(model_type: ModelType = ModelType.REGULAR) -> List[str]:
    if model_type == ModelType.WEAK:
        model = get_model('model_weak')
    elif model_type == ModelType.REASONING:
        model = get_model('model_reasoning')
    elif model_type == ModelType.REGULAR:
        model = get_settings().config.model
    else:
        model = get_settings().config.model
    fallback_models = get_settings().config.fallback_models
    if not isinstance(fallback_models, list):
        fallback_models = [m.strip() for m in fallback_models.split(",")]
    all_models = [model] + fallback_models
    return all_models


def _get_all_deployments(all_models: List[str]) -> List[str]:
    deployment_id = get_settings().get("openai.deployment_id", None)
    fallback_deployments = get_settings().get("openai.fallback_deployments", [])
    if not isinstance(fallback_deployments, list) and fallback_deployments:
        fallback_deployments = [d.strip() for d in fallback_deployments.split(",")]
    if fallback_deployments:
        all_deployments = [deployment_id] + fallback_deployments
        if len(all_deployments) < len(all_models):
            raise ValueError(f"The number of deployments ({len(all_deployments)}) "
                             f"is less than the number of models ({len(all_models)})")
    else:
        all_deployments = [deployment_id] * len(all_models)
    return all_deployments


def get_pr_multi_diffs(git_provider: GitProvider,
                       token_handler: TokenHandler,
                       model: str,
                       max_calls: int = 5,
                       add_line_numbers: bool = True) -> List[str]:
    """
    Retrieves the diff files from a Git provider, sorts them by main language, and generates patches for each file.
    The patches are split into multiple groups based on the maximum number of tokens allowed for the given model.

    Args:
        git_provider (GitProvider): An object that provides access to Git provider APIs.
        token_handler (TokenHandler): An object that handles tokens in the context of a pull request.
        model (str): The name of the model.
        max_calls (int, optional): The maximum number of calls to retrieve diff files. Defaults to 5.

    Returns:
        List[str]: A list of final diff strings, split into multiple groups based on the maximum number of tokens allowed for the given model.

    Raises:
        RateLimitExceededException: If the rate limit for the Git provider API is exceeded.
    """
    try:
        diff_files = git_provider.get_diff_files()
    except RateLimitExceededException as e:
        get_logger().error(f"Rate limit exceeded for git provider API. original message {e}")
        raise

    # Sort files by main language
    pr_languages = sort_files_by_main_languages(git_provider.get_languages(), diff_files)

    # Get the maximum number of extra lines before and after the patch
    PATCH_EXTRA_LINES_BEFORE = get_settings().config.patch_extra_lines_before
    PATCH_EXTRA_LINES_AFTER = get_settings().config.patch_extra_lines_after
    PATCH_EXTRA_LINES_BEFORE = cap_and_log_extra_lines(PATCH_EXTRA_LINES_BEFORE, "before")
    PATCH_EXTRA_LINES_AFTER = cap_and_log_extra_lines(PATCH_EXTRA_LINES_AFTER, "after")

    # try first a single run with standard diff string, with patch extension, and no deletions
    patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
        pr_languages, token_handler,
        add_line_numbers_to_hunks=add_line_numbers,
        patch_extra_lines_before=PATCH_EXTRA_LINES_BEFORE,
        patch_extra_lines_after=PATCH_EXTRA_LINES_AFTER)

    # if we are under the limit, return the full diff
    if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
        return ["\n".join(patches_extended)] if patches_extended else []

    # Sort files within each language group by tokens in descending order
    sorted_files = []
    for lang in pr_languages:
        sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))

    patches = []
    final_diff_list = []
    total_tokens = token_handler.prompt_tokens
    call_number = 1
    for file in sorted_files:
        if call_number > max_calls:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Reached max calls ({max_calls})")
            break

        original_file_content_str = file.base_file
        new_file_content_str = file.head_file
        patch = file.patch
        if not patch:
            continue

        # Remove delete-only hunks
        patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type)
        if patch is None:
            continue

        # Add line numbers and metadata to the patch
        if add_line_numbers:
            patch = decouple_and_convert_to_hunks_with_lines_numbers(patch, file)
        else:
            patch = f"\n\n## File: '{file.filename.strip()}'\n\n{patch.strip()}\n"

        # add AI-summary metadata to the patch
        if file.ai_file_summary and get_settings().get("config.enable_ai_metadata", False):
            patch = add_ai_summary_top_patch(file, patch)
        new_patch_tokens = token_handler.count_tokens(patch)

        if patch and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
                model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
            if get_settings().config.get('large_patch_policy', 'skip') == 'skip':
                get_logger().warning(f"Patch too large, skipping: {file.filename}")
                continue
            elif get_settings().config.get('large_patch_policy') == 'clip':
                delta_tokens = get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD - token_handler.prompt_tokens
                patch_clipped = clip_tokens(patch, delta_tokens, delete_last_line=True, num_input_tokens=new_patch_tokens)
                new_patch_tokens = token_handler.count_tokens(patch_clipped)
                if patch_clipped and (token_handler.prompt_tokens + new_patch_tokens) > get_max_tokens(
                        model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD:
                    get_logger().warning(f"Patch too large, skipping: {file.filename}")
                    continue
                else:
                    get_logger().info(f"Clipped large patch for file: {file.filename}")
                    patch = patch_clipped
            else:
                get_logger().warning(f"Patch too large, skipping: {file.filename}")
                continue

        if patch and (total_tokens + new_patch_tokens > get_max_tokens(model) - OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD):
            final_diff = "\n".join(patches)
            final_diff_list.append(final_diff)
            patches = []
            total_tokens = token_handler.prompt_tokens
            call_number += 1
            if call_number > max_calls: # avoid creating new patches
                if get_settings().config.verbosity_level >= 2:
                    get_logger().info(f"Reached max calls ({max_calls})")
                break
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Call number: {call_number}")

        if patch:
            patches.append(patch)
            total_tokens += new_patch_tokens
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}")

    # Add the last chunk
    if patches:
        final_diff = "\n".join(patches)
        final_diff_list.append(final_diff.strip())

    return final_diff_list


def add_ai_metadata_to_diff_files(git_provider, pr_description_files):
    """
    Adds AI metadata to the diff files based on the PR description files (FilePatchInfo.ai_file_summary).
    """
    try:
        if not pr_description_files:
            get_logger().warning(f"PR description files are empty.")
            return
        available_files = {pr_file['full_file_name'].strip(): pr_file for pr_file in pr_description_files}
        diff_files = git_provider.get_diff_files()
        found_any_match = False
        for file in diff_files:
            filename = file.filename.strip()
            if filename in available_files:
                file.ai_file_summary = available_files[filename]
                found_any_match = True
        if not found_any_match:
            get_logger().error(f"Failed to find any matching files between PR description and diff files.",
                               artifact={"pr_description_files": pr_description_files})
    except Exception as e:
        get_logger().error(f"Failed to add AI metadata to diff files: {e}",
                           artifact={"traceback": traceback.format_exc()})


def add_ai_summary_top_patch(file, full_extended_patch):
    try:
        # below every instance of '## File: ...' in the patch, add the ai-summary metadata
        full_extended_patch_lines = full_extended_patch.split("\n")
        for i, line in enumerate(full_extended_patch_lines):
            if line.startswith("## File:") or line.startswith("## file:"):
                full_extended_patch_lines.insert(i + 1,
                                                 f"### AI-generated changes summary:\n{file.ai_file_summary['long_summary']}")
                full_extended_patch = "\n".join(full_extended_patch_lines)
                return full_extended_patch

        # if no '## File: ...' was found
        return full_extended_patch
    except Exception as e:
        get_logger().error(f"Failed to add AI summary to the top of the patch: {e}",
                           artifact={"traceback": traceback.format_exc()})
        return full_extended_patch


================================================
FILE: pr_agent/algo/token_handler.py
================================================
from threading import Lock
from math import ceil
import re

from jinja2 import Environment, StrictUndefined
from tiktoken import encoding_for_model, get_encoding

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger


class ModelTypeValidator:
    @staticmethod
    def is_openai_model(model_name: str) -> bool:
        return 'gpt' in model_name or re.match(r"^o[1-9](-mini|-preview)?$", model_name)
    
    @staticmethod
    def is_anthropic_model(model_name: str) -> bool:
        return 'claude' in model_name


class TokenEncoder:
    _encoder_instance = None
    _model = None
    _lock = Lock()  # Create a lock object

    @classmethod
    def get_token_encoder(cls):
        model = get_settings().config.model
        if cls._encoder_instance is None or model != cls._model:  # Check without acquiring the lock for performance
            with cls._lock:  # Lock acquisition to ensure thread safety
                if cls._encoder_instance is None or model != cls._model:
                    cls._model = model
                    try:
                        cls._encoder_instance = encoding_for_model(cls._model) if "gpt" in cls._model else get_encoding(
                            "o200k_base")
                    except:
                        cls._encoder_instance = get_encoding("o200k_base")
        return cls._encoder_instance


class TokenHandler:
    """
    A class for handling tokens in the context of a pull request.

    Attributes:
    - encoder: An object of the encoding_for_model class from the tiktoken module. Used to encode strings and count the
      number of tokens in them.
    - limit: The maximum number of tokens allowed for the given model, as defined in the MAX_TOKENS dictionary in the
      pr_agent.algo module.
    - prompt_tokens: The number of tokens in the system and user strings, as calculated by the _get_system_user_tokens
      method.
    """

    # Constants
    CLAUDE_MODEL = "claude-3-7-sonnet-20250219"
    CLAUDE_MAX_CONTENT_SIZE = 9_000_000 # Maximum allowed content size (9MB) for Claude API

    def __init__(self, pr=None, vars: dict = {}, system="", user=""):
        """
        Initializes the TokenHandler object.

        Args:
        - pr: The pull request object.
        - vars: A dictionary of variables.
        - system: The system string.
        - user: The user string.
        """
        self.encoder = TokenEncoder.get_token_encoder()
        
        if pr is not None:
            self.prompt_tokens = self._get_system_user_tokens(pr, self.encoder, vars, system, user)

    def _get_system_user_tokens(self, pr, encoder, vars: dict, system, user):
        """
        Calculates the number of tokens in the system and user strings.

        Args:
        - pr: The pull request object.
        - encoder: An object of the encoding_for_model class from the tiktoken module.
        - vars: A dictionary of variables.
        - system: The system string.
        - user: The user string.

        Returns:
        The sum of the number of tokens in the system and user strings.
        """
        try:
            environment = Environment(undefined=StrictUndefined)
            system_prompt = environment.from_string(system).render(vars)
            user_prompt = environment.from_string(user).render(vars)
            system_prompt_tokens = len(encoder.encode(system_prompt))
            user_prompt_tokens = len(encoder.encode(user_prompt))
            return system_prompt_tokens + user_prompt_tokens
        except Exception as e:
            get_logger().error(f"Error in _get_system_user_tokens: {e}")
            return 0

    def _calc_claude_tokens(self, patch: str) -> int:
        try:
            import anthropic
            from pr_agent.algo import MAX_TOKENS
            
            client = anthropic.Anthropic(api_key=get_settings(use_context=False).get('anthropic.key'))
            max_tokens = MAX_TOKENS[get_settings().config.model]

            if len(patch.encode('utf-8')) > self.CLAUDE_MAX_CONTENT_SIZE:
                get_logger().warning(
                    "Content too large for Anthropic token counting API, falling back to local tokenizer"
                )
                return max_tokens

            response = client.messages.count_tokens(
                model=self.CLAUDE_MODEL,
                system="system",
                messages=[{
                    "role": "user",
                    "content": patch
                }],
            )
            return response.input_tokens

        except Exception as e:
            get_logger().error(f"Error in Anthropic token counting: {e}")
            return max_tokens

    def _apply_estimation_factor(self, model_name: str, default_estimate: int) -> int:
        factor = 1 + get_settings().get('config.model_token_count_estimate_factor', 0)
        get_logger().warning(f"{model_name}'s token count cannot be accurately estimated. Using factor of {factor}")
        
        return ceil(factor * default_estimate)

    def _get_token_count_by_model_type(self, patch: str, default_estimate: int) -> int:
        """
        Get token count based on model type.

        Args:
            patch: The text to count tokens for.
            default_estimate: The default token count estimate.

        Returns:
            int: The calculated token count.
        """
        model_name = get_settings().config.model.lower()
        
        if ModelTypeValidator.is_openai_model(model_name) and get_settings(use_context=False).get('openai.key'):
            return default_estimate

        if ModelTypeValidator.is_anthropic_model(model_name) and get_settings(use_context=False).get('anthropic.key'):
            return self._calc_claude_tokens(patch)
        
        return self._apply_estimation_factor(model_name, default_estimate)
    
    def count_tokens(self, patch: str, force_accurate: bool = False) -> int:
        """
        Counts the number of tokens in a given patch string.

        Args:
        - patch: The patch string.
        - force_accurate: If True, uses a more precise calculation method.

        Returns:
        The number of tokens in the patch string.
        """
        encoder_estimate = len(self.encoder.encode(patch, disallowed_special=()))

        # If an estimate is enough (for example, in cases where the maximal allowed tokens is way below the known limits), return it.
        if not force_accurate:
            return encoder_estimate

        return self._get_token_count_by_model_type(patch, encoder_estimate)


================================================
FILE: pr_agent/algo/types.py
================================================
from dataclasses import dataclass
from enum import Enum
from typing import Optional


class EDIT_TYPE(Enum):
    ADDED = 1
    DELETED = 2
    MODIFIED = 3
    RENAMED = 4
    UNKNOWN = 5


@dataclass
class FilePatchInfo:
    base_file: str
    head_file: str
    patch: str
    filename: str
    tokens: int = -1
    edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN
    old_filename: str = None
    num_plus_lines: int = -1
    num_minus_lines: int = -1
    language: Optional[str] = None
    ai_file_summary: str = None


================================================
FILE: pr_agent/algo/utils.py
================================================
from __future__ import annotations

import ast
import copy
import difflib
import hashlib
import html
import json
import os
import re
import sys
import textwrap
import time
import traceback
from datetime import datetime
from enum import Enum
from importlib.metadata import PackageNotFoundError, version
from typing import Any, List, Tuple, TypedDict

import html2text
import requests
import yaml
from pydantic import BaseModel
from starlette_context import context

from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch
from pr_agent.algo.token_handler import TokenEncoder
from pr_agent.algo.types import FilePatchInfo
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.log import get_logger


def get_model(model_type: str = "model_weak") -> str:
    if model_type == "model_weak" and get_settings().get("config.model_weak"):
        return get_settings().config.model_weak
    elif model_type == "model_reasoning" and get_settings().get("config.model_reasoning"):
        return get_settings().config.model_reasoning
    return get_settings().config.model


class Range(BaseModel):
    line_start: int  # should be 0-indexed
    line_end: int
    column_start: int = -1
    column_end: int = -1


class ModelType(str, Enum):
    REGULAR = "regular"
    WEAK = "weak"
    REASONING = "reasoning"


class TodoItem(TypedDict):
    relevant_file: str
    line_range: Tuple[int, int]
    content: str


class PRReviewHeader(str, Enum):
    REGULAR = "## PR Reviewer Guide"
    INCREMENTAL = "## Incremental PR Reviewer Guide"


class ReasoningEffort(str, Enum):
    XHIGH = "xhigh"
    HIGH = "high"
    MEDIUM = "medium"
    LOW = "low"
    MINIMAL = "minimal"
    NONE = "none"


class PRDescriptionHeader(str, Enum):
    DIAGRAM_WALKTHROUGH = "Diagram Walkthrough"
    FILE_WALKTHROUGH = "File Walkthrough"


def get_setting(key: str) -> Any:
    try:
        key = key.upper()
        return context.get("settings", global_settings).get(key, global_settings.get(key, None))
    except Exception:
        return global_settings.get(key, None)


def emphasize_header(text: str, only_markdown=False, reference_link=None) -> str:
    try:
        # Finding the position of the first occurrence of ": "
        colon_position = text.find(": ")

        # Splitting the string and wrapping the first part in <strong> tags
        if colon_position != -1:
            # Everything before the colon (inclusive) is wrapped in <strong> tags
            if only_markdown:
                if reference_link:
                    transformed_string = f"[**{text[:colon_position + 1]}**]({reference_link})\n" + text[colon_position + 1:]
                else:
                    transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:]
            else:
                if reference_link:
                    transformed_string = f"<strong><a href='{reference_link}'>{text[:colon_position + 1]}</a></strong><br>" + text[colon_position + 1:]
                else:
                    transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
        else:
            # If there's no ": ", return the original string
            transformed_string = text

        return transformed_string
    except Exception as e:
        get_logger().exception(f"Failed to emphasize header: {e}")
        return text


def unique_strings(input_list: List[str]) -> List[str]:
    if not input_list or not isinstance(input_list, list):
        return input_list
    seen = set()
    unique_list = []
    for item in input_list:
        if item not in seen:
            unique_list.append(item)
            seen.add(item)
    return unique_list


def convert_to_markdown_v2(output_data: dict,
                           gfm_supported: bool = True,
                           incremental_review=None,
                           git_provider=None,
                           files=None) -> str:
    """
    Convert a dictionary of data into markdown format.
    Args:
        output_data (dict): A dictionary containing data to be converted to markdown format.
    Returns:
        str: The markdown formatted text generated from the input dictionary.
    """

    emojis = {
        "Can be split": "🔀",
        "Key issues to review": "⚡",
        "Recommended focus areas for review": "⚡",
        "Score": "🏅",
        "Relevant tests": "🧪",
        "Focused PR": "✨",
        "Relevant ticket": "🎫",
        "Security concerns": "🔒",
        "Todo sections": "📝",
        "Insights from user's answers": "📝",
        "Code feedback": "🤖",
        "Estimated effort to review [1-5]": "⏱️",
        "Contribution time cost estimate": "⏳",
        "Ticket compliance check": "🎫",
    }
    markdown_text = ""
    if not incremental_review:
        markdown_text += f"{PRReviewHeader.REGULAR.value} 🔍\n\n"
    else:
        markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n"
        markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
    if not output_data or not output_data.get('review', {}):
        return ""

    if get_settings().get("pr_reviewer.enable_intro_text", False):
        markdown_text += f"Here are some key observations to aid the review process:\n\n"

    if gfm_supported:
        markdown_text += "<table>\n"

    todo_summary = output_data['review'].pop('todo_summary', '')
    for key, value in output_data['review'].items():
        if value is None or value == '' or value == {} or value == []:
            if key.lower() not in ['can_be_split', 'key_issues_to_review']:
                continue
        key_nice = key.replace('_', ' ').capitalize()
        emoji = emojis.get(key_nice, "")
        if 'Estimated effort to review' in key_nice:
            key_nice = 'Estimated effort to review'
            value = str(value).strip()
            if value.isnumeric():
                value_int = int(value)
            else:
                try:
                    value_int = int(value.split(',')[0])
                except ValueError:
                    continue
            blue_bars = '🔵' * value_int
            white_bars = '⚪' * (5 - value_int)
            value = f"{value_int} {blue_bars}{white_bars}"
            if gfm_supported:
                markdown_text += f"<tr><td>"
                markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
                markdown_text += f"</td></tr>\n"
            else:
                markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
        elif 'relevant tests' in key_nice.lower():
            value = str(value).strip().lower()
            if gfm_supported:
                markdown_text += f"<tr><td>"
                if is_value_no(value):
                    markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
                else:
                    markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
                markdown_text += f"</td></tr>\n"
            else:
                if is_value_no(value):
                    markdown_text += f'### {emoji} No relevant tests\n\n'
                else:
                    markdown_text += f"### {emoji} PR contains tests\n\n"
        elif 'ticket compliance check' in key_nice.lower():
            markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported)
        elif 'contribution time cost estimate' in key_nice.lower():
            if gfm_supported:
                markdown_text += f"<tr><td>{emoji}&nbsp;<strong>Contribution time estimate</strong> (best, average, worst case): "
                markdown_text += f"{value['best_case'].replace('m', ' minutes')} | {value['average_case'].replace('m', ' minutes')} | {value['worst_case'].replace('m', ' minutes')}"
                markdown_text += f"</td></tr>\n"
            else:
                markdown_text += f"### {emoji} Contribution time estimate (best, average, worst case): "
                markdown_text += f"{value['best_case'].replace('m', ' minutes')} | {value['average_case'].replace('m', ' minutes')} | {value['worst_case'].replace('m', ' minutes')}\n\n"
        elif 'security concerns' in key_nice.lower():
            if gfm_supported:
                markdown_text += f"<tr><td>"
                if is_value_no(value):
                    markdown_text += f"{emoji}&nbsp;<strong>No security concerns identified</strong>"
                else:
                    markdown_text += f"{emoji}&nbsp;<strong>Security concerns</strong><br><br>\n\n"
                    value = emphasize_header(value.strip())
                    markdown_text += f"{value}"
                markdown_text += f"</td></tr>\n"
            else:
                if is_value_no(value):
                    markdown_text += f'### {emoji} No security concerns identified\n\n'
                else:
                    markdown_text += f"### {emoji} Security concerns\n\n"
                    value = emphasize_header(value.strip(), only_markdown=True)
                    markdown_text += f"{value}\n\n"
        elif 'todo sections' in key_nice.lower():
            if gfm_supported:
                markdown_text += "<tr><td>"
                if is_value_no(value):
                    markdown_text += f"✅&nbsp;<strong>No TODO sections</strong>"
                else:
                    markdown_todo_items = format_todo_items(value, git_provider, gfm_supported)
                    markdown_text += f"{emoji}&nbsp;<strong>TODO sections</strong>\n<br><br>\n"
                    markdown_text += markdown_todo_items
                markdown_text += "</td></tr>\n"
            else:
                if is_value_no(value):
                    markdown_text += f"### ✅ No TODO sections\n\n"
                else:
                    markdown_todo_items = format_todo_items(value, git_provider, gfm_supported)
                    markdown_text += f"### {emoji} TODO sections\n\n"
                    markdown_text += markdown_todo_items
        elif 'can be split' in key_nice.lower():
            if gfm_supported:
                markdown_text += f"<tr><td>"
                markdown_text += process_can_be_split(emoji, value)
                markdown_text += f"</td></tr>\n"
        elif 'key issues to review' in key_nice.lower():
            # value is a list of issues
            if is_value_no(value):
                if gfm_supported:
                    markdown_text += f"<tr><td>"
                    markdown_text += f"{emoji}&nbsp;<strong>No major issues detected</strong>"
                    markdown_text += f"</td></tr>\n"
                else:
                    markdown_text += f"### {emoji} No major issues detected\n\n"
            else:
                issues = value
                if gfm_supported:
                    markdown_text += f"<tr><td>"
                    # markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\n\n"
                    markdown_text += f"{emoji}&nbsp;<strong>Recommended focus areas for review</strong><br><br>\n\n"
                else:
                    markdown_text += f"### {emoji} Recommended focus areas for review\n\n#### \n"
                for i, issue in enumerate(issues):
                    try:
                        if not issue or not isinstance(issue, dict):
                            continue
                        relevant_file = issue.get('relevant_file', '').strip()
                        issue_header = issue.get('issue_header', '').strip()
                        if issue_header.lower() == 'possible bug':
                            issue_header = 'Possible Issue'  # Make the header less frightening
                        issue_content = issue.get('issue_content', '').strip()
                        start_line = int(str(issue.get('start_line', 0)).strip())
                        end_line = int(str(issue.get('end_line', 0)).strip())

                        relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=True)
                        if git_provider:
                            reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
                        else:
                            reference_link = None

                        if gfm_supported:
                            if reference_link is not None and len(reference_link) > 0:
                                if relevant_lines_str:
                                    issue_str = f"<details><summary><a href='{reference_link}'><strong>{issue_header}</strong></a>\n\n{issue_content}\n</summary>\n\n{relevant_lines_str}\n\n</details>"
                                else:
                                    issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
                            else:
                                issue_str = f"<strong>{issue_header}</strong><br>{issue_content}"
                        else:
                            if reference_link is not None and len(reference_link) > 0:
                                issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
                            else:
                                issue_str = f"**{issue_header}**\n\n{issue_content}\n\n"
                        markdown_text += f"{issue_str}\n\n"
                    except Exception as e:
                        get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}")
                if gfm_supported:
                    markdown_text += f"</td></tr>\n"
        else:
            if gfm_supported:
                markdown_text += f"<tr><td>"
                markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
                markdown_text += f"</td></tr>\n"
            else:
                markdown_text += f"### {emoji} {key_nice}: {value}\n\n"

    if gfm_supported:
        markdown_text += "</table>\n"

    return markdown_text


def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
    """
    Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
    """
    try:
        relevant_lines_str = ""
        if files:
            files = set_file_languages(files)
            for file in files:
                if file.filename.strip() == relevant_file:
                    if not file.head_file:
                        # as a fallback, extract relevant lines directly from patch
                        patch = file.patch
                        get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead")
                        _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')
                        if not selected_lines:
                            get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}")
                            return ""
                        # filter out '-' lines
                        relevant_lines_str = ""
                        for line in selected_lines.splitlines():
                            if line.startswith('-'):
                                continue
                            relevant_lines_str += line[1:] + '\n'
                    else:
                        relevant_file_lines = file.head_file.splitlines()
                        relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])

                    if dedent and relevant_lines_str:
                        # Remove the longest leading string of spaces and tabs common to all lines.
                        relevant_lines_str = textwrap.dedent(relevant_lines_str)
                    relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```"
                    break

        return relevant_lines_str
    except Exception as e:
        get_logger().exception(f"Failed to extract relevant lines: {e}")
        return ""


def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str:
    ticket_compliance_str = ""
    compliance_emoji = ''
    # Track compliance levels across all tickets
    all_compliance_levels = []

    if isinstance(value, list):
        for ticket_analysis in value:
            try:
                ticket_url = ticket_analysis.get('ticket_url', '').strip()
                explanation = ''
                ticket_compliance_level = ''  # Individual ticket compliance
                fully_compliant_str = ticket_analysis.get('fully_compliant_requirements', '').strip()
                not_compliant_str = ticket_analysis.get('not_compliant_requirements', '').strip()
                requires_further_human_verification = ticket_analysis.get('requires_further_human_verification',
                                                                          '').strip()

                if not fully_compliant_str and not not_compliant_str:
                    get_logger().debug(f"Ticket compliance has no requirements",
                                       artifact={'ticket_url': ticket_url})
                    continue

                # Calculate individual ticket compliance level
                if fully_compliant_str:
                    if not_compliant_str:
                        ticket_compliance_level = 'Partially compliant'
                    else:
                        if not requires_further_human_verification:
                            ticket_compliance_level = 'Fully compliant'
                        else:
                            ticket_compliance_level = 'PR Code Verified'
                elif not_compliant_str:
                    ticket_compliance_level = 'Not compliant'

                # Store the compliance level for aggregation
                if ticket_compliance_level:
                    all_compliance_levels.append(ticket_compliance_level)

                # build compliance string
                if fully_compliant_str:
                    explanation += f"Compliant requirements:\n\n{fully_compliant_str}\n\n"
                if not_compliant_str:
                    explanation += f"Non-compliant requirements:\n\n{not_compliant_str}\n\n"
                if requires_further_human_verification:
                    explanation += f"Requires further human verification:\n\n{requires_further_human_verification}\n\n"
                ticket_compliance_str += f"\n\n**[{ticket_url.split('/')[-1]}]({ticket_url}) - {ticket_compliance_level}**\n\n{explanation}\n\n"

                # for debugging
                if requires_further_human_verification:
                    get_logger().debug(f"Ticket compliance requires further human verification",
                                       artifact={'ticket_url': ticket_url,
                                                 'requires_further_human_verification': requires_further_human_verification,
                                                 'compliance_level': ticket_compliance_level})

            except Exception as e:
                get_logger().exception(f"Failed to process ticket compliance: {e}")
                continue

        # Calculate overall compliance level and emoji
        if all_compliance_levels:
            if all(level == 'Fully compliant' for level in all_compliance_levels):
                compliance_level = 'Fully compliant'
                compliance_emoji = '✅'
            elif all(level == 'PR Code Verified' for level in all_compliance_levels):
                compliance_level = 'PR Code Verified'
                compliance_emoji = '✅'
            elif any(level == 'Not compliant' for level in all_compliance_levels):
                # If there's a mix of compliant and non-compliant tickets
                if any(level in ['Fully compliant', 'PR Code Verified'] for level in all_compliance_levels):
                    compliance_level = 'Partially compliant'
                    compliance_emoji = '🔶'
                else:
                    compliance_level = 'Not compliant'
                    compliance_emoji = '❌'
            elif any(level == 'Partially compliant' for level in all_compliance_levels):
                compliance_level = 'Partially compliant'
                compliance_emoji = '🔶'
            else:
                compliance_level = 'PR Code Verified'
                compliance_emoji = '✅'

            # Set extra statistics outside the ticket loop
            get_settings().set('config.extra_statistics', {'compliance_level': compliance_level})

        # editing table row for ticket compliance analysis
        if gfm_supported:
            markdown_text += f"<tr><td>\n\n"
            markdown_text += f"**{emoji} Ticket compliance analysis {compliance_emoji}**\n\n"
            markdown_text += ticket_compliance_str
            markdown_text += f"</td></tr>\n"
        else:
            markdown_text += f"### {emoji} Ticket compliance analysis {compliance_emoji}\n\n"
            markdown_text += ticket_compliance_str + "\n\n"

    return markdown_text


def process_can_be_split(emoji, value):
    try:
        # key_nice = "Can this PR be split?"
        key_nice = "Multiple PR themes"
        markdown_text = ""
        if not value or isinstance(value, list) and len(value) == 1:
            value = "No"
            # markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
            # markdown_text += f"### {emoji} No multiple PR themes\n\n"
            markdown_text += f"{emoji} <strong>No multiple PR themes</strong>\n\n"
        else:
            markdown_text += f"{emoji} <strong>{key_nice}</strong><br><br>\n\n"
            for i, split in enumerate(value):
                title = split.get('title', '')
                relevant_files = split.get('relevant_files', [])
                markdown_text += f"<details><summary>\nSub-PR theme: <b>{title}</b></summary>\n\n"
                markdown_text += f"___\n\nRelevant files:\n\n"
                for file in relevant_files:
                    markdown_text += f"- {file}\n"
                markdown_text += f"___\n\n"
                markdown_text += f"</details>\n\n"

                # markdown_text += f"#### Sub-PR theme: {title}\n\n"
                # markdown_text += f"Relevant files:\n\n"
                # for file in relevant_files:
                #     markdown_text += f"- {file}\n"
                # markdown_text += "\n"
            # number_of_splits = len(value)
            # markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
            # for i, split in enumerate(value):
            #     title = split.get('title', '')
            #     relevant_files = split.get('relevant_files', [])
            #     if i == 0:
            #         markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
            #         markdown_text += f"<hr>\n"
            #         markdown_text += f"Relevant files:\n"
            #         markdown_text += f"<ul>\n"
            #         for file in relevant_files:
            #             markdown_text += f"<li>{file}</li>\n"
            #         markdown_text += f"</ul>\n\n</details></td></tr>\n"
            #     else:
            #         markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
            #         markdown_text += f"<hr>\n"
            #         markdown_text += f"Relevant files:\n"
            #         markdown_text += f"<ul>\n"
            #         for file in relevant_files:
            #             markdown_text += f"<li>{file}</li>\n"
            #         markdown_text += f"</ul>\n\n</details></td></tr>\n"
    except Exception as e:
        get_logger().exception(f"Failed to process can be split: {e}")
        return ""
    return markdown_text


def parse_code_suggestion(code_suggestion: dict, i: int = 0, gfm_supported: bool = True) -> str:
    """
    Convert a dictionary of data into markdown format.

    Args:
        code_suggestion (dict): A dictionary containing data to be converted to markdown format.

    Returns:
        str: A string containing the markdown formatted text generated from the input dictionary.
    """
    markdown_text = ""
    if gfm_supported and 'relevant_line' in code_suggestion:
        markdown_text += '<table>'
        for sub_key, sub_value in code_suggestion.items():
            try:
                if sub_key.lower() == 'relevant_file':
                    relevant_file = sub_value.strip('`').strip('"').strip("'")
                    markdown_text += f"<tr><td>relevant file</td><td>{relevant_file}</td></tr>"
                    # continue
                elif sub_key.lower() == 'suggestion':
                    markdown_text += (f"<tr><td>{sub_key} &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td>"
                                      f"<td>\n\n<strong>\n\n{sub_value.strip()}\n\n</strong>\n</td></tr>")
                elif sub_key.lower() == 'relevant_line':
                    markdown_text += f"<tr><td>relevant line</td>"
                    sub_value_list = sub_value.split('](')
                    relevant_line = sub_value_list[0].lstrip('`').lstrip('[')
                    if len(sub_value_list) > 1:
                        link = sub_value_list[1].rstrip(')').strip('`')
                        markdown_text += f"<td><a href='{link}'>{relevant_line}</a></td>"
                    else:
                        markdown_text += f"<td>{relevant_line}</td>"
                    markdown_text += "</tr>"
            except Exception as e:
                get_logger().exception(f"Failed to parse code suggestion: {e}")
                pass
        markdown_text += '</table>'
        markdown_text += "<hr>"
    else:
        for sub_key, sub_value in code_suggestion.items():
            if isinstance(sub_key, str):
                sub_key = sub_key.rstrip()
            if isinstance(sub_value,str):
                sub_value = sub_value.rstrip()
            if isinstance(sub_value, dict):  # "code example"
                markdown_text += f"  - **{sub_key}:**\n"
                for code_key, code_value in sub_value.items():  # 'before' and 'after' code
                    code_str = f"```\n{code_value}\n```"
                    code_str_indented = textwrap.indent(code_str, '        ')
                    markdown_text += f"    - **{code_key}:**\n{code_str_indented}\n"
            else:
                if "relevant_file" in sub_key.lower():
                    markdown_text += f"\n  - **{sub_key}:** {sub_value}  \n"
                else:
                    markdown_text += f"   **{sub_key}:** {sub_value}  \n"
                if "relevant_line" not in sub_key.lower():  # nicer presentation
                    # markdown_text = markdown_text.rstrip('\n') + "\\\n" # works for gitlab
                    markdown_text = markdown_text.rstrip('\n') + "   \n"  # works for gitlab and bitbucker

        markdown_text += "\n"
    return markdown_text


def try_fix_json(review, max_iter=10, code_suggestions=False):
    """
    Fix broken or incomplete JSON messages and return the parsed JSON data.

    Args:
    - review: A string containing the JSON message to be fixed.
    - max_iter: An integer representing the maximum number of iterations to try and fix the JSON message.
    - code_suggestions: A boolean indicating whether to try and fix JSON messages with code feedback.

    Returns:
    - data: A dictionary containing the parsed JSON data.

    The function attempts to fix broken or incomplete JSON messages by parsing until the last valid code suggestion.
    If the JSON message ends with a closing bracket, the function calls the fix_json_escape_char function to fix the
    message.
    If code_suggestions is True and the JSON message contains code feedback, the function tries to fix the JSON
    message by parsing until the last valid code suggestion.
    The function uses regular expressions to find the last occurrence of "}," with any number of whitespaces or
    newlines.
    It tries to parse the JSON message with the closing bracket and checks if it is valid.
    If the JSON message is valid, the parsed JSON data is returned.
    If the JSON message is not valid, the last code suggestion is removed and the process is repeated until a valid JSON
    message is obtained or the maximum number of iterations is reached.
    If a valid JSON message is not obtained, an error is logged and an empty dictionary is returned.
    """

    if review.endswith("}"):
        return fix_json_escape_char(review)

    data = {}
    if code_suggestions:
        closing_bracket = "]}"
    else:
        closing_bracket = "]}}"

    if (review.rfind("'Code feedback': [") > 0 or review.rfind('"Code feedback": [') > 0) or \
            (review.rfind("'Code suggestions': [") > 0 or review.rfind('"Code suggestions": [') > 0) :
        last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
        valid_json = False
        iter_count = 0

        while last_code_suggestion_ind > 0 and not valid_json and iter_count < max_iter:
            try:
                data = json.loads(review[:last_code_suggestion_ind] + closing_bracket)
                valid_json = True
                review = review[:last_code_suggestion_ind].strip() + closing_bracket
            except json.decoder.JSONDecodeError:
                review = review[:last_code_suggestion_ind]
                last_code_suggestion_ind = [m.end() for m in re.finditer(r"\}\s*,", review)][-1] - 1
                iter_count += 1

        if not valid_json:
            get_logger().error("Unable to decode JSON response from AI")
            data = {}

    return data


def fix_json_escape_char(json_message=None):
    """
    Fix broken or incomplete JSON messages and return the parsed JSON data.

    Args:
        json_message (str): A string containing the JSON message to be fixed.

    Returns:
        dict: A dictionary containing the parsed JSON data.

    Raises:
        None

    """
    try:
        result = json.loads(json_message)
    except Exception as e:
        # Find the offending character index:
        idx_to_replace = int(str(e).split(' ')[-1].replace(')', ''))
        # Remove the offending character:
        json_message = list(json_message)
        json_message[idx_to_replace] = ' '
        new_message = ''.join(json_message)
        return fix_json_escape_char(json_message=new_message)
    return result


def convert_str_to_datetime(date_str):
    """
    Convert a string representation of a date and time into a datetime object.

    Args:
        date_str (str): A string representation of a date and time in the format '%a, %d %b %Y %H:%M:%S %Z'

    Returns:
        datetime: A datetime object representing the input date and time.

    Example:
        >>> convert_str_to_datetime('Mon, 01 Jan 2022 12:00:00 UTC')
        datetime.datetime(2022, 1, 1, 12, 0, 0)
    """
    datetime_format = '%a, %d %b %Y %H:%M:%S %Z'
    return datetime.strptime(date_str, datetime_format)


def load_large_diff(filename, new_file_content_str: str, original_file_content_str: str, show_warning: bool = True) -> str:
    """
    Generate a patch for a modified file by comparing the original content of the file with the new content provided as
    input.
    """
    if not original_file_content_str and not new_file_content_str:
        return ""

    try:
        original_file_content_str = (original_file_content_str or "").rstrip() + "\n"
        new_file_content_str = (new_file_content_str or "").rstrip() + "\n"
        diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True),
                                    new_file_content_str.splitlines(keepends=True))
        if get_settings().config.verbosity_level >= 2 and show_warning:
            get_logger().info(f"File was modified, but no patch was found. Manually creating patch: {filename}.")
        patch = ''.join(diff)
        return patch
    except Exception as e:
        get_logger().exception(f"Failed to generate patch for file: {filename}")
        return ""


def update_settings_from_args(args: List[str]) -> List[str]:
    """
    Update the settings of the Dynaconf object based on the arguments passed to the function.

    Args:
        args: A list of arguments passed to the function.
        Example args: ['--pr_code_suggestions.extra_instructions="be funny',
                  '--pr_code_suggestions.num_code_suggestions=3']

    Returns:
        None

    Raises:
        ValueError: If the argument is not in the correct format.

    """
    other_args = []
    if args:
        for arg in args:
            arg = arg.strip()
            if arg.startswith('--'):
                arg = arg.strip('-').strip()
                vals = arg.split('=', 1)
                if len(vals) != 2:
                    if len(vals) > 2:  # --extended is a valid argument
                        get_logger().error(f'Invalid argument format: {arg}')
                    other_args.append(arg)
                    continue
                key, value = _fix_key_value(*vals)
                get_settings().set(key, value)
                get_logger().info(f'Updated setting {key} to: "{value}"')
            else:
                other_args.append(arg)
    return other_args


def _fix_key_value(key: str, value: str):
    key = key.strip().upper()
    value = value.strip()
    try:
        value = yaml.safe_load(value)
    except Exception as e:
        get_logger().debug(f"Failed to parse YAML for config override {key}={value}", exc_info=e)
    return key, value


def load_yaml(response_text: str, keys_fix_yaml: List[str] = [], first_key="", last_key="") -> dict:
    response_text_original = copy.deepcopy(response_text)
    response_text = response_text.strip('\n').removeprefix('yaml').removeprefix('```yaml').rstrip().removesuffix('```')
    try:
        data = yaml.safe_load(response_text)
    except Exception as e:
        get_logger().warning(f"Initial failure to parse AI prediction: {e}")
        data = try_fix_yaml(response_text, keys_fix_yaml=keys_fix_yaml, first_key=first_key, last_key=last_key,
                            response_text_original=response_text_original)
        if not data:
            get_logger().error(f"Failed to parse AI prediction after fallbacks",
                               artifact={'response_text': response_text})
        else:
            get_logger().info(f"Successfully parsed AI prediction after fallbacks",
                              artifact={'response_text': response_text})
    return data


def try_fix_yaml(response_text: str,
                 keys_fix_yaml: List[str] = [],
                 first_key="",
                 last_key="",
                 response_text_original="") -> dict:
    response_text_lines = response_text.split('\n')

    keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:',
                 'improved code:', 'label:', 'why:', 'suggestion_summary:']
    keys_yaml = keys_yaml + keys_fix_yaml

    # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n        ...'
    response_text_lines_copy = response_text_lines.copy()
    for i in range(0, len(response_text_lines_copy)):
        for key in keys_yaml:
            if key in response_text_lines_copy[i] and not '|' in response_text_lines_copy[i]:
                response_text_lines_copy[i] = response_text_lines_copy[i].replace(f'{key}',
                                                                                  f'{key} |\n        ')
    try:
        data = yaml.safe_load('\n'.join(response_text_lines_copy))
        get_logger().info(f"Successfully parsed AI prediction after adding |-\n")
        return data
    except:
        pass

    # 1.5 fallback - try to convert '|' to '|2'. Will solve cases of indent decreasing during the code
    response_text_copy = copy.deepcopy(response_text)
    response_text_copy = response_text_copy.replace('|\n', '|2\n')
    try:
        data = yaml.safe_load(response_text_copy)
        get_logger().info(f"Successfully parsed AI prediction after replacing | with |2")
        return data
    except:
        # if it fails, we can try to add spaces to the lines that are not indented properly, and contain '}'.
        response_text_lines_copy = response_text_copy.split('\n')
        for i in range(0, len(response_text_lines_copy)):
            initial_space = len(response_text_lines_copy[i]) - len(response_text_lines_copy[i].lstrip())
            if initial_space == 2 and '|2' not in response_text_lines_copy[i] and '}' in response_text_lines_copy[i]:
                response_text_lines_copy[i] = '    ' + response_text_lines_copy[i].lstrip()
        try:
            data = yaml.safe_load('\n'.join(response_text_lines_copy))
            get_logger().info(f"Successfully parsed AI prediction after replacing | with |2 and adding spaces")
            return data
        except:
            pass

    # second fallback - try to extract only range from first ```yaml to the last ```
    snippet_pattern = r'```yaml([\s\S]*?)```(?=\s*$|")'
    snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
    if not snippet:
        snippet = re.search(snippet_pattern, response_text_original) # before we removed the "```"
    if snippet:
        snippet_text = snippet.group()
        try:
            data = yaml.safe_load(snippet_text.removeprefix('```yaml').rstrip('`'))
            get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
            return data
        except:
            pass


    # third fallback - try to remove leading and trailing curly brackets
    response_text_copy = response_text.strip().rstrip().removeprefix('{').removesuffix('}').rstrip(':\n')
    try:
        data = yaml.safe_load(response_text_copy)
        get_logger().info(f"Successfully parsed AI prediction after removing curly brackets")
        return data
    except:
        pass


    # forth fallback - try to extract yaml snippet by 'first_key' and 'last_key'
    # note that 'last_key' can be in practice a key that is not the last key in the yaml snippet.
    # it just needs to be some inner key, so we can look for newlines after it
    if first_key and last_key:
        index_start = response_text.find(f"\n{first_key}:")
        if index_start == -1:
            index_start = response_text.find(f"{first_key}:")
        index_last_code = response_text.rfind(f"{last_key}:")
        index_end = response_text.find("\n\n", index_last_code) # look for newlines after last_key
        if index_end == -1:
            index_end = len(response_text)
        response_text_copy = response_text[index_start:index_end].strip().strip('```yaml').strip('`').strip()
        if response_text_copy:
            try:
                data = yaml.safe_load(response_text_copy)
                get_logger().info(f"Successfully parsed AI prediction after extracting yaml snippet")
                return data
            except:
                pass

    # fifth fallback - try to remove leading '+' (sometimes added by AI for 'existing code' and 'improved code')
    response_text_lines_copy = response_text_lines.copy()
    for i in range(0, len(response_text_lines_copy)):
        if response_text_lines_copy[i].startswith('+'):
            response_text_lines_copy[i] = ' ' + response_text_lines_copy[i][1:]
    try:
        data = yaml.safe_load('\n'.join(response_text_lines_copy))
        get_logger().info(f"Successfully parsed AI prediction after removing leading '+'")
        return data
    except:
        pass

    # sixth fallback - replace tabs with spaces
    if '\t' in response_text:
        response_text_copy = copy.deepcopy(response_text)
        response_text_copy = response_text_copy.replace('\t', '    ')
        try:
            data = yaml.safe_load(response_text_copy)
            get_logger().info(f"Successfully parsed AI prediction after replacing tabs with spaces")
            return data
        except:
            pass

    # seventh fallback - add indent for sections of code blocks
    response_text_copy = copy.deepcopy(response_text)
    response_text_copy_lines = response_text_copy.split('\n')
    start_line = -1
    improve_sections = ['existing_code:', 'improved_code:', 'response:', 'why:']
    describe_sections = ['description:', 'title:', 'changes_diagram:', 'pr_files:', 'pr_ticket:']
    for i, line in enumerate(response_text_copy_lines):
        line_stripped = line.rstrip()
        if any(key in line_stripped for key in (improve_sections+describe_sections)):
            start_line = i
        elif line_stripped.endswith(': |') or line_stripped.endswith(': |-') or line_stripped.endswith(': |2') or any(line_stripped.endswith(key) for key in keys_yaml):
            start_line = -1
        elif start_line != -1:
            response_text_copy_lines[i] = '    ' + line
    response_text_copy = '\n'.join(response_text_copy_lines)
    response_text_copy = response_text_copy.replace(' |\n', ' |2\n')
    try:
        data = yaml.safe_load(response_text_copy)
        get_logger().info(f"Successfully parsed AI prediction after adding indent for sections of code blocks")
        return data
    except:
        pass

    # eighth fallback - try to remove pipe chars at the root-level dicts
    response_text_copy = copy.deepcopy(response_text)
    response_text_copy = response_text_copy.lstrip('|\n')
    try:
        data = yaml.safe_load(response_text_copy)
        get_logger().info(f"Successfully parsed AI prediction after removing pipe chars")
        return data
    except:
        pass

    # ninth fallback - try to decode the response text with different encodings. GPT-5 can return text that is not utf-8 encoded.
    encodings_to_try = ['latin-1', 'utf-16']
    for encoding in encodings_to_try:
        try:
            data = yaml.safe_load(response_text.encode(encoding).decode("utf-8"))
            if data:
                get_logger().info(f"Successfully parsed AI prediction after decoding with {encoding} encoding")
                return data
        except:
            pass

    # # sixth fallback - try to remove last lines
    # for i in range(1, len(response_text_lines)):
    #     response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
    #     try:
    #         data = yaml.safe_load(response_text_lines_tmp)
    #         get_logger().info(f"Successfully parsed AI prediction after removing {i} lines")
    #         return data
    #     except:
    #         pass


def set_custom_labels(variables, git_provider=None):
    if not get_settings().config.enable_custom_labels:
        return

    labels = get_settings().get('custom_labels', {})
    if not labels:
        # set default labels
        labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']
        labels_list = "\n      - ".join(labels) if labels else ""
        labels_list = f"      - {labels_list}" if labels_list else ""
        variables["custom_labels"] = labels_list
        return

    # Set custom labels
    variables["custom_labels_class"] = "class Label(str, Enum):"
    counter = 0
    labels_minimal_to_labels_dict = {}
    for k, v in labels.items():
        description = "'" + v['description'].strip('\n').replace('\n', '\\n') + "'"
        # variables["custom_labels_class"] += f"\n    {k.lower().replace(' ', '_')} = '{k}' # {description}"
        variables["custom_labels_class"] += f"\n    {k.lower().replace(' ', '_')} = {description}"
        labels_minimal_to_labels_dict[k.lower().replace(' ', '_')] = k
        counter += 1
    variables["labels_minimal_to_labels_dict"] = labels_minimal_to_labels_dict

def get_user_labels(current_labels: List[str] = None):
    """
    Only keep labels that has been added by the user
    """
    try:
        enable_custom_labels = get_settings().config.get('enable_custom_labels', False)
        custom_labels = get_settings().get('custom_labels', [])
        if current_labels is None:
            current_labels = []
        user_labels = []
        for label in current_labels:
            if label.lower() in ['bug fix', 'tests', 'enhancement', 'documentation', 'other']:
                continue
            if enable_custom_labels:
                if label in custom_labels:
                    continue
            user_labels.append(label)
        if user_labels:
            get_logger().debug(f"Keeping user labels: {user_labels}")
    except Exception as e:
        get_logger().exception(f"Failed to get user labels: {e}")
        return current_labels
    return user_labels


def get_max_tokens(model):
    """
    Get the maximum number of tokens allowed for a model.
    logic:
    (1) If the model is in './pr_agent/algo/__init__.py', use the value from there.
    (2) else, the user needs to define explicitly 'config.custom_model_max_tokens'

    For both cases, we further limit the number of tokens to 'config.max_model_tokens' if it is set.
    This aims to improve the algorithmic quality, as the AI model degrades in performance when the input is too long.
    """
    settings = get_settings()
    if model in MAX_TOKENS:
        max_tokens_model = MAX_TOKENS[model]
    elif settings.config.custom_model_max_tokens > 0:
        max_tokens_model = settings.config.custom_model_max_tokens
    else:
        get_logger().error(f"Model {model} is not defined in MAX_TOKENS in ./pr_agent/algo/__init__.py and no custom_model_max_tokens is set")
        raise Exception(f"Ensure {model} is defined in MAX_TOKENS in ./pr_agent/algo/__init__.py or set a positive value for it in config.custom_model_max_tokens")

    if settings.config.max_model_tokens and settings.config.max_model_tokens > 0:
        max_tokens_model = min(settings.config.max_model_tokens, max_tokens_model)
    return max_tokens_model


def clip_tokens(text: str, max_tokens: int, add_three_dots=True, num_input_tokens=None, delete_last_line=False) -> str:
    """
    Clip the number of tokens in a string to a maximum number of tokens.

    This function limits text to a specified token count by calculating the approximate
    character-to-token ratio and truncating the text accordingly. A safety factor of 0.9
    (10% reduction) is applied to ensure the result stays within the token limit.

    Args:
        text (str): The string to clip. If empty or None, returns the input unchanged.
        max_tokens (int): The maximum number of tokens allowed in the string.
                         If negative, returns an empty string.
        add_three_dots (bool, optional): Whether to add "\\n...(truncated)" at the end
                                       of the clipped text to indicate truncation.
                                       Defaults to True.
        num_input_tokens (int, optional): Pre-computed number of tokens in the input text.
                                        If provided, skips token encoding step for efficiency.
                                        If None, tokens will be counted using TokenEncoder.
                                        Defaults to None.
        delete_last_line (bool, optional): Whether to remove the last line from the
                                         clipped content before adding truncation indicator.
                                         Useful for ensuring clean breaks at line boundaries.
                                         Defaults to False.

    Returns:
        str: The clipped string. Returns original text if:
             - Text is empty/None
             - Token count is within limit
             - An error occurs during processing

             Returns empty string if max_tokens <= 0.

    Examples:
        Basic usage:
        >>> text = "This is a sample text that might be too long"
        >>> result = clip_tokens(text, max_tokens=10)
        >>> print(result)
        This is a sample...
        (truncated)

        Without truncation indicator:
        >>> result = clip_tokens(text, max_tokens=10, add_three_dots=False)
        >>> print(result)
        This is a sample

        With pre-computed token count:
        >>> result = clip_tokens(text, max_tokens=5, num_input_tokens=15)
        >>> print(result)
        This...
        (truncated)

        With line deletion:
        >>> multiline_text = "Line 1\\nLine 2\\nLine 3"
        >>> result = clip_tokens(multiline_text, max_tokens=3, delete_last_line=True)
        >>> print(result)
        Line 1
        Line 2
        ...
        (truncated)

    Notes:
        The function uses a safety factor of 0.9 (10% reduction) to ensure the
        result stays within the token limit, as character-to-token ratios can vary.
        If token encoding fails, the original text is returned with a warning logged.
    """
    if not text:
        return text

    try:
        if num_input_tokens is None:
            encoder = TokenEncoder.get_token_encoder()
            num_input_tokens = len(encoder.encode(text))
        if num_input_tokens <= max_tokens:
            return text
        if max_tokens < 0:
            return ""

        # calculate the number of characters to keep
        num_chars = len(text)
        chars_per_token = num_chars / num_input_tokens
        factor = 0.9  # reduce by 10% to be safe
        num_output_chars = int(factor * chars_per_token * max_tokens)

        # clip the text
        if num_output_chars > 0:
            clipped_text = text[:num_output_chars]
            if delete_last_line:
                clipped_text = clipped_text.rsplit('\n', 1)[0]
            if add_three_dots:
                clipped_text += "\n...(truncated)"
        else: # if the text is empty
            clipped_text =  ""

        return clipped_text
    except Exception as e:
        get_logger().warning(f"Failed to clip tokens: {e}")
        return text

def replace_code_tags(text):
    """
    Replace odd instances of ` with <code> and even instances of ` with </code>
    """
    text = html.escape(text)
    parts = text.split('`')
    for i in range(1, len(parts), 2):
        parts[i] = '<code>' + parts[i] + '</code>'
    return ''.join(parts)


def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
                                              relevant_file: str,
                                              relevant_line_in_file: str,
                                              absolute_position: int = None) -> Tuple[int, int]:
    position = -1
    if absolute_position is None:
        absolute_position = -1
    re_hunk_header = re.compile(
        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")

    if not diff_files:
        return position, absolute_position

    for file in diff_files:
        if file.filename and (file.filename.strip() == relevant_file):
            patch = file.patch
            patch_lines = patch.splitlines()
            delta = 0
            start1, size1, start2, size2 = 0, 0, 0, 0
            if absolute_position != -1: # matching absolute to relative
                for i, line in enumerate(patch_lines):
                    # new hunk
                    if line.startswith('@@'):
                        delta = 0
                        match = re_hunk_header.match(line)
                        start1, size1, start2, size2 = map(int, match.groups()[:4])
                    elif not line.startswith('-'):
                        delta += 1

                    #
                    absolute_position_curr = start2 + delta - 1

                    if absolute_position_curr == absolute_position:
                        position = i
                        break
            else:
                # try to find the line in the patch using difflib, with some margin of error
                matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
                                                                             patch_lines, n=3, cutoff=0.93)
                if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
                    relevant_line_in_file = matches_difflib[0]


                for i, line in enumerate(patch_lines):
                    if line.startswith('@@'):
                        delta = 0
                        match = re_hunk_header.match(line)
                        start1, size1, start2, size2 = map(int, match.groups()[:4])
                    elif not line.startswith('-'):
                        delta += 1

                    if relevant_line_in_file in line and line[0] != '-':
                        position = i
                        absolute_position = start2 + delta - 1
                        break

                if position == -1 and relevant_line_in_file[0] == '+':
                    no_plus_line = relevant_line_in_file[1:].lstrip()
                    for i, line in enumerate(patch_lines):
                        if line.startswith('@@'):
                            delta = 0
                            match = re_hunk_header.match(line)
                            start1, size1, start2, size2 = map(int, match.groups()[:4])
                        elif not line.startswith('-'):
                            delta += 1

                        if no_plus_line in line and line[0] != '-':
                            # The model might add a '+' to the beginning of the relevant_line_in_file even if originally
                            # it's a context line
                            position = i
                            absolute_position = start2 + delta - 1
                            break
    return position, absolute_position

def get_rate_limit_status(github_token) -> dict:
    GITHUB_API_URL = get_settings(use_context=False).get("GITHUB.BASE_URL", "https://api.github.com").rstrip("/")  # "https://api.github.com"
    # GITHUB_API_URL = "https://api.github.com"
    RATE_LIMIT_URL = f"{GITHUB_API_URL}/rate_limit"
    HEADERS = {
        "Accept": "application/vnd.github.v3+json",
        "Authorization": f"token {github_token}"
    }

    response = requests.get(RATE_LIMIT_URL, headers=HEADERS)
    try:
        rate_limit_info = response.json()
        if rate_limit_info.get('message') == 'Rate limiting is not enabled.':  # for github enterprise
            return {'resources': {}}
        response.raise_for_status()  # Check for HTTP errors
    except:  # retry
        time.sleep(0.1)
        response = requests.get(RATE_LIMIT_URL, headers=HEADERS)
        return response.json()
    return rate_limit_info


def validate_rate_limit_github(github_token, installation_id=None, threshold=0.1) -> bool:
    try:
        rate_limit_status = get_rate_limit_status(github_token)
        if installation_id:
            get_logger().debug(f"installation_id: {installation_id}, Rate limit status: {rate_limit_status['rate']}")
    # validate that the rate limit is not exceeded
        # validate that the rate limit is not exceeded
        for key, value in rate_limit_status['resources'].items():
            if value['remaining'] < value['limit'] * threshold:
                get_logger().error(f"key: {key}, value: {value}")
                return False
        return True
    except Exception as e:
        get_logger().error(f"Error in rate limit {e}",
                           artifact={"traceback": traceback.format_exc()})
        return True


def validate_and_await_rate_limit(github_token):
    try:
        rate_limit_status = get_rate_limit_status(github_token)
        # validate that the rate limit is not exceeded
        for key, value in rate_limit_status['resources'].items():
            if value['remaining'] < value['limit'] // 80:
                get_logger().error(f"key: {key}, value: {value}")
                sleep_time_sec = value['reset'] - datetime.now().timestamp()
                sleep_time_hour = sleep_time_sec / 3600.0
                get_logger().error(f"Rate limit exceeded. Sleeping for {sleep_time_hour} hours")
                if sleep_time_sec > 0:
                    time.sleep(sleep_time_sec + 1)
                rate_limit_status = get_rate_limit_status(github_token)
        return rate_limit_status
    except:
        get_logger().error("Error in rate limit")
        return None


def github_action_output(output_data: dict, key_name: str):
    try:
        if not get_settings().get('github_action_config.enable_output', False):
            return

        key_data = output_data.get(key_name, {})
        with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
            print(f"{key_name}={json.dumps(key_data, indent=None, ensure_ascii=False)}", file=fh)
    except Exception as e:
        get_logger().error(f"Failed to write to GitHub Action output: {e}")
    return


def show_relevant_configurations(relevant_section: str) -> str:
    skip_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider', "skip_keys", "app_id", "redirect",
                      'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS','APP_NAME']
    extra_skip_keys = get_settings().config.get('config.skip_keys', [])
    if extra_skip_keys:
        skip_keys.extend(extra_skip_keys)

    markdown_text = ""
    markdown_text += "\n<hr>\n<details> <summary><strong>🛠️ Relevant configurations:</strong></summary> \n\n"
    markdown_text +="<br>These are the relevant [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml) for this tool:\n\n"
    markdown_text += f"**[config**]\n```yaml\n\n"
    for key, value in get_settings().config.items():
        if key in skip_keys:
            continue
        markdown_text += f"{key}: {value}\n"
    markdown_text += "\n```\n"
    markdown_text += f"\n**[{relevant_section}]**\n```yaml\n\n"
    for key, value in get_settings().get(relevant_section, {}).items():
        if key in skip_keys:
            continue
        markdown_text += f"{key}: {value}\n"
    markdown_text += "\n```"
    markdown_text += "\n</details>\n"
    return markdown_text

def is_value_no(value):
    if not value:
        return True
    value_str = str(value).strip().lower()
    if value_str == 'no' or value_str == 'none' or value_str == 'false':
        return True
    return False


def set_pr_string(repo_name, pr_number):
    return f"{repo_name}#{pr_number}"


def string_to_uniform_number(s: str) -> float:
    """
    Convert a string to a uniform number in the range [0, 1].
    The uniform distribution is achieved by the nature of the SHA-256 hash function, which produces a uniformly distributed hash value over its output space.
    """
    # Generate a hash of the string
    hash_object = hashlib.sha256(s.encode())
    # Convert the hash to an integer
    hash_int = int(hash_object.hexdigest(), 16)
    # Normalize the integer to the range [0, 1]
    max_hash_int = 2 ** 256 - 1
    uniform_number = float(hash_int) / max_hash_int
    return uniform_number


def process_description(description_full: str) -> Tuple[str, List]:
    if not description_full:
        return "", []

    # description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value)
    if PRDescriptionHeader.FILE_WALKTHROUGH.value in description_full:
        try:
            # FILE_WALKTHROUGH are presented in a collapsible section in the description
            regex_pattern = r'<details.*?>\s*<summary>\s*<h3>\s*' + re.escape(PRDescriptionHeader.FILE_WALKTHROUGH.value) + r'\s*</h3>\s*</summary>'
            description_split = re.split(regex_pattern, description_full, maxsplit=1, flags=re.DOTALL)

            # If the regex pattern is not found, fallback to the previous method
            if len(description_split) == 1:
                get_logger().debug("Could not find regex pattern for file walkthrough, falling back to simple split")
                description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1)
        except Exception as e:
            get_logger().warning(f"Failed to split description using regex, falling back to simple split: {e}")
            description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1)

        if len(description_split) < 2:
            get_logger().error("Failed to split description into base and changes walkthrough", artifact={'description': description_full})
            return description_full.strip(), []

        base_description_str = description_split[0].strip()
        changes_walkthrough_str = ""
        files = []
        if len(description_split) > 1:
            changes_walkthrough_str = description_split[1]
        else:
            get_logger().debug("No changes walkthrough found")
    else:
        base_description_str = description_full.strip()
        return base_description_str, []

    try:
        if changes_walkthrough_str:
            # get the end of the table
            if '</table>\n\n___' in changes_walkthrough_str:
                end = changes_walkthrough_str.index("</table>\n\n___")
            elif '\n___' in changes_walkthrough_str:
                end = changes_walkthrough_str.index("\n___")
            else:
                end = len(changes_walkthrough_str)
            changes_walkthrough_str = changes_walkthrough_str[:end]

            h = html2text.HTML2Text()
            h.body_width = 0  # Disable line wrapping

            # find all the files
            pattern = r'<tr>\s*<td>\s*(<details>\s*<summary>(.*?)</summary>(.*?)</details>)\s*</td>'
            files_found = re.findall(pattern, changes_walkthrough_str, re.DOTALL)
            for file_data in files_found:
                try:
                    if isinstance(file_data, tuple):
                        file_data = file_data[0]
                    pattern = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*(?:<li>|•)(.*?)</details>'
                    res = re.search(pattern, file_data, re.DOTALL)
                    if not res or res.lastindex != 4:
                        pattern_back = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\n\n\s*(.*?)</details>'
                        res = re.search(pattern_back, file_data, re.DOTALL)
                    if not res or res.lastindex != 4:
                        pattern_back = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*-\s*(.*?)\s*</details>' # looking for hypen ('- ')
                        res = re.search(pattern_back, file_data, re.DOTALL)
                    if res and res.lastindex == 4:
                        short_filename = res.group(1).strip()
                        short_summary = res.group(2).strip()
                        long_filename = res.group(3).strip()
                        if long_filename.endswith('<ul>'):
                            long_filename = long_filename[:-4].strip()
                        long_summary =  res.group(4).strip()
                        long_summary = long_summary.replace('<br> *', '\n*').replace('<br>','').replace('\n','<br>')
                        long_summary = h.handle(long_summary).strip()
                        if long_summary.startswith('\\-'):
                            long_summary = "* " + long_summary[2:]
                        elif not long_summary.startswith('*'):
                            long_summary = f"* {long_summary}"

                        files.append({
                            'short_file_name': short_filename,
                            'full_file_name': long_filename,
                            'short_summary': short_summary,
                            'long_summary': long_summary
                        })
                    else:
                        if '<code>...</code>' in file_data:
                            pass # PR with many files. some did not get analyzed
                        else:
                            get_logger().warning(f"Failed to parse description", artifact={'description': file_data})
                except Exception as e:
                    get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})


    except Exception as e:
        get_logger().exception(f"Failed to process description: {e}")

    return base_description_str, files

def get_version() -> str:
    # First check pyproject.toml if running directly out of repository
    if os.path.exists("pyproject.toml"):
        if sys.version_info >= (3, 11):
            import tomllib
            with open("pyproject.toml", "rb") as f:
                data = tomllib.load(f)
                if "project" in data and "version" in data["project"]:
                    return data["project"]["version"]
                else:
                    get_logger().warning("Version not found in pyproject.toml")
        else:
            get_logger().warning("Unable to determine local version from pyproject.toml")

    # Otherwise get the installed pip package version
    try:
        return version('pr-agent')
    except PackageNotFoundError:
        get_logger().warning("Unable to find package named 'pr-agent'")
        return "unknown"


def set_file_languages(diff_files) -> List[FilePatchInfo]:
    try:
        # if the language is already set, do not change it
        if hasattr(diff_files[0], 'language') and diff_files[0].language:
            return diff_files

        # map file extensions to programming languages
        language_extension_map_org = get_settings().language_extension_map_org
        extension_to_language = {}
        for language, extensions in language_extension_map_org.items():
            for ext in extensions:
                extension_to_language[ext] = language
        for file in diff_files:
            extension_s = '.' + file.filename.rsplit('.')[-1]
            language_name = "txt"
            if extension_s and (extension_s in extension_to_language):
                language_name = extension_to_language[extension_s]
            file.language = language_name.lower()
    except Exception as e:
        get_logger().exception(f"Failed to set file languages: {e}")

    return diff_files

def format_todo_item(todo_item: TodoItem, git_provider, gfm_supported) -> str:
    relevant_file = todo_item.get('relevant_file', '').strip()
    line_number = todo_item.get('line_number', '')
    content = todo_item.get('content', '')
    reference_link = git_provider.get_line_link(relevant_file, line_number, line_number)
    file_ref = f"{relevant_file} [{line_number}]"
    if reference_link:
        if gfm_supported:
            file_ref = f"<a href='{reference_link}'>{file_ref}</a>"
        else:
            file_ref = f"[{file_ref}]({reference_link})"

    if content:
        return f"{file_ref}: {content.strip()}"
    else:
        # if content is empty, return only the file reference
        return file_ref


def format_todo_items(value: list[TodoItem] | TodoItem, git_provider, gfm_supported) -> str:
    markdown_text = ""
    MAX_ITEMS = 5 # limit the number of items to display
    if gfm_supported:
        if isinstance(value, list):
            markdown_text += "<ul>\n"
            if len(value) > MAX_ITEMS:
                get_logger().debug(f"Truncating todo items to {MAX_ITEMS} items")
                value = value[:MAX_ITEMS]
            for todo_item in value:
                markdown_text += f"<li>{format_todo_item(todo_item, git_provider, gfm_supported)}</li>\n"
            markdown_text += "</ul>\n"
        else:
            markdown_text += f"<p>{format_todo_item(value, git_provider, gfm_supported)}</p>\n"
    else:
        if isinstance(value, list):
            if len(value) > MAX_ITEMS:
                get_logger().debug(f"Truncating todo items to {MAX_ITEMS} items")
                value = value[:MAX_ITEMS]
            for todo_item in value:
                markdown_text += f"- {format_todo_item(todo_item, git_provider, gfm_supported)}\n"
        else:
            markdown_text += f"- {format_todo_item(value, git_provider, gfm_supported)}\n"
    return markdown_text


================================================
FILE: pr_agent/cli.py
================================================
import argparse
import asyncio
import os

from pr_agent.agent.pr_agent import PRAgent, commands
from pr_agent.algo.utils import get_version
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger, setup_logger

log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)


def set_parser():
    parser = argparse.ArgumentParser(description='AI based pull request analyzer', usage=
    """\
    Usage: cli.py --pr_url=<URL on supported git hosting service> <command> [<args>].
    For example:
    - cli.py --pr_url=... review
    - cli.py --pr_url=... describe
    - cli.py --pr_url=... improve
    - cli.py --pr_url=... ask "write me a poem about this PR"
    - cli.py --pr_url=... reflect
    - cli.py --issue_url=... similar_issue
    - cli.py --pr_url/--issue_url= help_docs [<asked question>]

    Supported commands:
    - review / review_pr - Add a review that includes a summary of the PR and specific suggestions for improvement.

    - ask / ask_question [question] - Ask a question about the PR.

    - describe / describe_pr - Modify the PR title and description based on the PR's contents.

    - improve / improve_code - Suggest improvements to the code in the PR as pull request comments ready to commit.
    Extended mode ('improve --extended') employs several calls, and provides a more thorough feedback

    - reflect - Ask the PR author questions about the PR.

    - update_changelog - Update the changelog based on the PR's contents.

    - add_docs

    - generate_labels
    
    - help_docs - Ask a question, from either an issue or PR context, on a given repo (current context or a different one)


    Configuration:
    To edit any configuration parameter from 'configuration.toml', just add -config_path=<value>.
    For example: 'python cli.py --pr_url=... review --pr_reviewer.extra_instructions="focus on the file: ..."'
    """)
    parser.add_argument('--version', action='version', version=f'pr-agent {get_version()}')
    parser.add_argument('--pr_url', type=str, help='The URL of the PR to review', default=None)
    parser.add_argument('--issue_url', type=str, help='The URL of the Issue to review', default=None)
    parser.add_argument('command', type=str, help='The', choices=commands, default='review')
    parser.add_argument('rest', nargs=argparse.REMAINDER, default=[])
    return parser


def run_command(pr_url, command):
    # Preparing the command
    run_command_str = f"--pr_url={pr_url} {command.lstrip('/')}"
    args = set_parser().parse_args(run_command_str.split())

    # Run the command. Feedback will appear in GitHub PR comments
    run(args=args)


def run(inargs=None, args=None):
    parser = set_parser()
    if not args:
        args = parser.parse_args(inargs)
    if not args.pr_url and not args.issue_url:
        parser.print_help()
        return

    command = args.command.lower()
    get_settings().set("CONFIG.CLI_MODE", True)

    async def inner():
        if args.issue_url:
            result = await asyncio.create_task(PRAgent().handle_request(args.issue_url, [command] + args.rest))
        else:
            result = await asyncio.create_task(PRAgent().handle_request(args.pr_url, [command] + args.rest))

        if get_settings().litellm.get("enable_callbacks", False):
            # There may be additional events on the event queue from the run above. If there are give them time to complete.
            get_logger().debug("Waiting for event queue to complete")
            tasks = [task for task in asyncio.all_tasks() if task is not asyncio.current_task()]
            if tasks:
                _, pending = await asyncio.wait(tasks, timeout=30)
                if pending:
                    get_logger().warning(
                        f"{len(pending)} callback tasks({[task.get_coro() for task in pending]}) did not complete within timeout"
                    )

        return result

    result = asyncio.run(inner())
    if not result:
        parser.print_help()


if __name__ == '__main__':
    run()


================================================
FILE: pr_agent/cli_pip.py
================================================
from pr_agent import cli
from pr_agent.config_loader import get_settings


def main():
    # Fill in the following values
    provider = "github"  # GitHub provider
    user_token = "..."  # GitHub user token
    openai_key = "..."  # OpenAI key
    pr_url = "..."  # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'
    command = "/review"  # Command to run (e.g. '/review', '/describe', '/ask="What is the purpose of this PR?"')

    # Setting the configurations
    get_settings().set("CONFIG.git_provider", provider)
    get_settings().set("openai.key", openai_key)
    get_settings().set("github.user_token", user_token)

    # Run the command. Feedback will appear in GitHub PR comments
    cli.run_command(pr_url, command)


if __name__ == '__main__':
    main()


================================================
FILE: pr_agent/config_loader.py
================================================
from os.path import abspath, dirname, join
from pathlib import Path
from typing import Optional

from dynaconf import Dynaconf
from starlette_context import context

PR_AGENT_TOML_KEY = 'pr-agent'

current_dir = dirname(abspath(__file__))

dynconf_kwargs = {'core_loaders': [], # DISABLE default loaders, otherwise will load toml files more than once.
                           'loaders': ['pr_agent.custom_merge_loader', 'dynaconf.loaders.env_loader'], # Use a custom loader to merge sections, but overwrite their overlapping values. Also support ENV variables to take precedence.
                           'root_path': join(current_dir, "settings"), #Used for Dynaconf.find_file() - So that root path points to settings folder, since we disabled all core loaders.
                           'merge_enabled': True  # In case more than one file is sent, merge them. Must be set to True, otherwise, a .toml file with section [XYZ] overwrites the entire section of a previous .toml file's [XYZ] and we want it to only overwrite the overlapping fields under such section
                           }
global_settings = Dynaconf(
    envvar_prefix=False,
    load_dotenv=False,  # Security: Don't load .env files
    settings_files=[join(current_dir, f) for f in [
        "settings/configuration.toml",
        "settings/ignore.toml",
        "settings/generated_code_ignore.toml",
        "settings/language_extensions.toml",
        "settings/pr_reviewer_prompts.toml",
        "settings/pr_questions_prompts.toml",
        "settings/pr_line_questions_prompts.toml",
        "settings/pr_description_prompts.toml",
        "settings/code_suggestions/pr_code_suggestions_prompts.toml",
        "settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml",
        "settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml",
        "settings/pr_information_from_user_prompts.toml",
        "settings/pr_update_changelog_prompts.toml",
        "settings/pr_custom_labels.toml",
        "settings/pr_add_docs.toml",
        "settings/custom_labels.toml",
        "settings/pr_help_prompts.toml",
        "settings/pr_help_docs_prompts.toml",
        "settings/pr_help_docs_headings_prompts.toml",
        "settings/.secrets.toml",
        "settings_prod/.secrets.toml",
    ]],
    **dynconf_kwargs
)


def get_settings(use_context=False):
    """
    Retrieves the current settings.

    This function attempts to fetch the settings from the starlette_context's context object. If it fails,
    it defaults to the global settings defined outside of this function.

    Returns:
        Dynaconf: The current settings object, either from the context or the global default.
    """
    try:
        return context["settings"]
    except Exception:
        return global_settings


# Add local configuration from pyproject.toml of the project being reviewed
def _find_repository_root() -> Optional[Path]:
    """
    Identify project root directory by recursively searching for the .git directory in the parent directories.
    """
    cwd = Path.cwd().resolve()
    no_way_up = False
    while not no_way_up:
        no_way_up = cwd == cwd.parent
        if (cwd / ".git").is_dir():
            return cwd
        cwd = cwd.parent
    return None


def _find_pyproject() -> Optional[Path]:
    """
    Search for file pyproject.toml in the repository root.
    """
    repo_root = _find_repository_root()
    if repo_root:
        pyproject = repo_root / "pyproject.toml"
        return pyproject if pyproject.is_file() else None
    return None


pyproject_path = _find_pyproject()
if pyproject_path is not None:
    get_settings().load_file(pyproject_path, env=f'tool.{PR_AGENT_TOML_KEY}')


def apply_secrets_manager_config():
    """
    Retrieve configuration from AWS Secrets Manager and override existing settings
    """
    try:
        # Dynamic imports to avoid circular dependency (secret_providers imports config_loader)
        from pr_agent.secret_providers import get_secret_provider
        from pr_agent.log import get_logger

        secret_provider = get_secret_provider()
        if not secret_provider:
            return

        if (hasattr(secret_provider, 'get_all_secrets') and
            get_settings().get("CONFIG.SECRET_PROVIDER") == 'aws_secrets_manager'):
            try:
                secrets = secret_provider.get_all_secrets()
                if secrets:
                    apply_secrets_to_config(secrets)
                    get_logger().info("Applied AWS Secrets Manager configuration")
            except Exception as e:
                get_logger().error(f"Failed to apply AWS Secrets Manager config: {e}")
    except Exception as e:
        try:
            from pr_agent.log import get_logger
            get_logger().debug(f"Secret provider not configured: {e}")
        except:
            # Fail completely silently if log module is not available
            pass


def apply_secrets_to_config(secrets: dict):
    """
    Apply secret dictionary to configuration
    """
    try:
        # Dynamic import to avoid potential circular dependency
        from pr_agent.log import get_logger
    except:
        def get_logger():
            class DummyLogger:
                def debug(self, msg): pass
            return DummyLogger()

    for key, value in secrets.items():
        if '.' in key:  # nested key like "openai.key"
            parts = key.split('.')
            if len(parts) == 2:
                section, setting = parts
                section_upper = section.upper()
                setting_upper = setting.upper()

                # Set only when no existing value (prioritize environment variables)
                current_value = get_settings().get(f"{section_upper}.{setting_upper}")
                if current_value is None or current_value == "":
                    get_settings().set(f"{section_upper}.{setting_upper}", value)
                    get_logger().debug(f"Set {section}.{setting} from AWS Secrets Manager")


================================================
FILE: pr_agent/custom_merge_loader.py
================================================
from pathlib import Path
import tomllib #tomllib should be used instead of Py toml for Python 3.11+

from jinja2.exceptions import SecurityError

from pr_agent.log import get_logger

def load(obj, env=None, silent=True, key=None, filename=None):
    """
    Load and merge TOML configuration files into a Dynaconf settings object using a secure, in-house loader.
    This loader:
    - Replaces list and dict fields instead of appending/updating (non-default Dynaconf behavior).
    - Enforces several security checks (e.g., disallows includes/preloads and enforces .toml files).
    - Supports optional single-key loading.
    - Supports Dynaconf's fresh_vars feature for dynamic reloading.
    Args:
        obj: The Dynaconf settings instance to update.
        env: The current environment name (upper case). Defaults to 'DEVELOPMENT'. Note: currently unused.
        silent (bool): If True, suppress exceptions and log warnings/errors instead.
        key (str | None): Load only this top-level key (section) if provided; otherwise, load all keys from the files.
        filename (str | None): Custom filename for tests (not used when settings_files are provided).
    Returns:
        None
    """

    MAX_TOML_SIZE_IN_BYTES = 100 * 1024 * 1024 # Prevent out of mem. exceptions by limiting to 100 MBs which is sufficient for upto 1M lines

    # Get the list of files to load
    # TODO: hasattr(obj, 'settings_files') for some reason returns False. Need to use 'settings_file'
    settings_files = obj.settings_files if hasattr(obj, 'settings_files') else (
        obj.settings_file) if hasattr(obj, 'settings_file') else []
    if not settings_files or not isinstance(settings_files, list):
        get_logger().warning("No settings files specified, or missing keys "
                             "(tried looking for 'settings_files' or 'settings_file'), or not a list. Skipping loading.",
                             artifact={'toml_obj_attributes_names': dir(obj)})
        return

    # Storage for all loaded data
    accumulated_data = {}

    # Security: Check for forbidden configuration options
    if hasattr(obj, 'includes') and obj.includes:
        if not silent:
            raise SecurityError("Configuration includes forbidden option: 'includes'. Skipping loading.")
        get_logger().error("Configuration includes forbidden option: 'includes'. Skipping loading.")
        return
    if hasattr(obj, 'preload') and obj.preload:
        if not silent:
            raise SecurityError("Configuration includes forbidden option: 'preload'. Skipping loading.")
        get_logger().error("Configuration includes forbidden option: 'preload'. Skipping loading.")
        return

    for settings_file in settings_files:
        try:
            # Load the TOML file
            file_path = Path(settings_file)
            # Security: Only allow .toml files
            if file_path.suffix.lower() != '.toml':
                get_logger().warning(f"Only .toml files are allowed. Skipping: {settings_file}")
                continue

            if not file_path.exists():
                get_logger().warning(f"Settings file not found: {settings_file}. Skipping it.")
                continue

            if file_path.stat().st_size > MAX_TOML_SIZE_IN_BYTES:
                get_logger().warning(f"Settings file too large (> {MAX_TOML_SIZE_IN_BYTES} bytes): {settings_file}. Skipping it.")
                continue

            with open(file_path, 'rb') as f:
                file_data = tomllib.load(f)

            # Handle sections (like [config], [default], etc.)
            if not isinstance(file_data, dict):
                get_logger().warning(f"TOML root is not a table in '{settings_file}'. Skipping.")
                continue

            # Security: Check file contents for forbidden directives
            validate_file_security(file_data, settings_file)

            for section_name, section_data in file_data.items():
                if not isinstance(section_data, dict):
                    get_logger().warning(f"Section '{section_name}' in '{settings_file}' is not a table. Skipping.")
                    continue
                for field, field_value in section_data.items():
                    if section_name not in accumulated_data:
                        accumulated_data[section_name] = {}
                    accumulated_data[section_name][field] = field_value

        except Exception as e:
            if not silent:
                raise e
            get_logger().exception(f"Exception loading settings file: {settings_file}. Skipping.")

    # Update the settings object
    for k, v in accumulated_data.items():
        # For fresh_vars support: key parameter is uppercase, but accumulated_data keys are lowercase
        if key is None or key.upper() == k.upper():
            obj.set(k, v)

def validate_file_security(file_data, filename):
    """
    Validate that the config file does not contain security-sensitive directives.

    Args:
        file_data: Parsed TOML data representing the configuration contents.
        filename: The name or path of the file being validated (used for error messages).

    Raises:
        SecurityError: If forbidden directives are found within the configuration, or if data too nested.
    """
    MAX_DEPTH = 50

    # Check for forbidden keys
    # Comprehensive list of forbidden keys with explanations
    forbidden_keys_to_reasons = {
        # Include mechanisms - allow loading arbitrary files
        'dynaconf_include': 'allows including other config files dynamically',
        'dynaconf_includes': 'allows including other config files dynamically',
        'includes': 'allows including other config files dynamically',

        # Preload mechanisms - allow loading files before main config
        'preload': 'allows preloading files with potential code execution',
        'preload_for_dynaconf': 'allows preloading files with potential code execution',
        'preloads': 'allows preloading files with potential code execution',

        # Merge controls - could be used to manipulate config behavior
        'dynaconf_merge': 'allows manipulating merge behavior',
        'dynaconf_merge_enabled': 'allows manipulating merge behavior',
        'merge_enabled': 'allows manipulating merge behavior',

        # Loader controls - allow changing how configs are loaded
        'loaders_for_dynaconf': 'allows overriding loaders to execute arbitrary code',
        'loaders': 'allows overriding loaders to execute arbitrary code',
        'core_loaders': 'allows overriding core loaders',
        'core_loaders_for_dynaconf': 'allows overriding core loaders',

        # Settings module - allows loading Python modules
        'settings_module': 'allows loading Python modules with code execution',
        'settings_file_for_dynaconf': 'could override settings file location',
        'settings_files_for_dynaconf': 'could override settings file location',

        # Environment variable prefix manipulation
        'envvar_prefix': 'allows changing environment variable prefix',
        'envvar_prefix_for_dynaconf': 'allows changing environment variable prefix',
    }

    # Check at the top level and in all sections
    def check_dict(data, path="", max_depth=MAX_DEPTH):
        if max_depth <= 0:
            raise SecurityError(
                f"Maximum nesting depth exceeded at {path}. "
                f"Possible attempt to cause stack overflow."
            )

        for key, value in data.items():
            full_path = f"{path}.{key}" if path else key

            if key.lower() in forbidden_keys_to_reasons:
                raise SecurityError(
                    f"Security error in {filename}: "
                    f"Forbidden directive '{key}' found at {full_path}. Reason: {forbidden_keys_to_reasons[key.lower()]}"
                )

            # Recursively check nested dicts
            if isinstance(value, dict):
                check_dict(value, path=full_path, max_depth=(max_depth - 1))

    check_dict(file_data, max_depth=MAX_DEPTH)


================================================
FILE: pr_agent/git_providers/__init__.py
================================================
from starlette_context import context

from pr_agent.config_loader import get_settings
from pr_agent.git_providers.azuredevops_provider import AzureDevopsProvider
from pr_agent.git_providers.bitbucket_provider import BitbucketProvider
from pr_agent.git_providers.bitbucket_server_provider import \
    BitbucketServerProvider
from pr_agent.git_providers.codecommit_provider import CodeCommitProvider
from pr_agent.git_providers.gerrit_provider import GerritProvider
from pr_agent.git_providers.git_provider import GitProvider
from pr_agent.git_providers.gitea_provider import GiteaProvider
from pr_agent.git_providers.github_provider import GithubProvider
from pr_agent.git_providers.gitlab_provider import GitLabProvider
from pr_agent.git_providers.local_git_provider import LocalGitProvider
from pr_agent.git_providers.gitea_provider import GiteaProvider

_GIT_PROVIDERS = {
    'github': GithubProvider,
    'gitlab': GitLabProvider,
    'bitbucket': BitbucketProvider,
    'bitbucket_server': BitbucketServerProvider,
    'azure': AzureDevopsProvider,
    'codecommit': CodeCommitProvider,
    'local': LocalGitProvider,
    'gerrit': GerritProvider,
    'gitea': GiteaProvider
}


def get_git_provider():
    try:
        provider_id = get_settings().config.git_provider
    except AttributeError as e:
        raise ValueError("git_provider is a required attribute in the configuration file") from e
    if provider_id not in _GIT_PROVIDERS:
        raise ValueError(f"Unknown git provider: {provider_id}")
    return _GIT_PROVIDERS[provider_id]


def get_git_provider_with_context(pr_url) -> GitProvider:
    """
    Get a GitProvider instance for the given PR URL. If the GitProvider instance is already in the context, return it.
    """

    is_context_env = None
    try:
        is_context_env = context.get("settings", None)
    except Exception:
        pass  # we are not in a context environment (CLI)

    # check if context["git_provider"]["pr_url"] exists
    if is_context_env and context.get("git_provider", {}).get("pr_url", {}):
        git_provider = context["git_provider"]["pr_url"]
        # possibly check if the git_provider is still valid, or if some reset is needed
        # ...
        return git_provider
    else:
        try:
            provider_id = get_settings().config.git_provider
            if provider_id not in _GIT_PROVIDERS:
                raise ValueError(f"Unknown git provider: {provider_id}")
            git_provider = _GIT_PROVIDERS[provider_id](pr_url)
            if is_context_env:
                context["git_provider"] = {pr_url: git_provider}
            return git_provider
        except Exception as e:
            raise ValueError(f"Failed to get git provider for {pr_url}") from e


================================================
FILE: pr_agent/git_providers/azuredevops_provider.py
================================================
from __future__ import annotations

import os
from typing import Optional, Tuple
from urllib.parse import urlparse

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo

from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import (PRDescriptionHeader, clip_tokens,
                          find_line_number_of_relevant_line_in_file,
                          load_large_diff)
from ..config_loader import get_settings
from ..log import get_logger
from .git_provider import GitProvider

AZURE_DEVOPS_AVAILABLE = True
ADO_APP_CLIENT_DEFAULT_ID = "499b84ac-1321-427f-aa17-267ca6975798/.default"
MAX_PR_DESCRIPTION_AZURE_LENGTH = 4000-1

try:
    # noinspection PyUnresolvedReferences
    from azure.devops.connection import Connection
    # noinspection PyUnresolvedReferences
    from azure.devops.released.git import (Comment, CommentThread, GitPullRequest, GitVersionDescriptor, GitClient, CommentThreadContext, CommentPosition)
    from azure.devops.released.work_item_tracking import WorkItemTrackingClient
    # noinspection PyUnresolvedReferences
    from azure.identity import DefaultAzureCredential
    from msrest.authentication import BasicAuthentication
except ImportError:
    AZURE_DEVOPS_AVAILABLE = False


class AzureDevopsProvider(GitProvider):

    def __init__(
            self, pr_url: Optional[str] = None, incremental: Optional[bool] = False
    ):
        if not AZURE_DEVOPS_AVAILABLE:
            raise ImportError(
                "Azure DevOps provider is not available. Please install the required dependencies."
            )

        self.azure_devops_client, self.azure_devops_board_client = self._get_azure_devops_client()
        self.diff_files = None
        self.workspace_slug = None
        self.repo_slug = None
        self.repo = None
        self.pr_num = None
        self.pr = None
        self.temp_comments = []
        self.incremental = incremental
        if pr_url:
            self.set_pr(pr_url)

    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        """
        Publishes code suggestions as comments on the PR.
        """
        post_parameters_list = []
        status = get_settings().azure_devops.get("default_comment_status", "closed")
        for suggestion in code_suggestions:
            body = suggestion['body']
            relevant_file = suggestion['relevant_file']
            relevant_lines_start = suggestion['relevant_lines_start']
            relevant_lines_end = suggestion['relevant_lines_end']

            if not relevant_lines_start or relevant_lines_start == -1:
                get_logger().warning(
                    f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
                continue

            if relevant_lines_end < relevant_lines_start:
                get_logger().warning(f"Failed to publish code suggestion, "
                                       f"relevant_lines_end is {relevant_lines_end} and "
                                       f"relevant_lines_start is {relevant_lines_start}")
                continue

            thread_context = CommentThreadContext(
                file_path=relevant_file,
                right_file_start=CommentPosition(offset=1, line=relevant_lines_start),
                right_file_end=CommentPosition(offset=1, line=relevant_lines_end))
            comment = Comment(content=body, comment_type=1)
            thread = CommentThread(comments=[comment], thread_context=thread_context, status=status)
            try:
                self.azure_devops_client.create_thread(
                    comment_thread=thread,
                    project=self.workspace_slug,
                    repository_id=self.repo_slug,
                    pull_request_id=self.pr_num
                )
            except Exception as e:
                get_logger().error(f"Azure failed to publish code suggestion, error: {e}", suggestion=suggestion)
        return True

    def reply_to_comment_from_comment_id(self, comment_id: int, body: str, is_temporary: bool = False) -> Comment:
        # comment_id is actually thread_id
        return self.reply_to_thread(comment_id, body, is_temporary)

    def get_pr_description_full(self) -> str:
        return self.pr.description

    def edit_comment(self, comment: Comment, body: str):
        try:
            self.azure_devops_client.update_comment(
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
                thread_id=comment.thread_id,
                comment_id=comment.id,
                comment=Comment(content=body),
                project=self.workspace_slug,
            )
        except Exception as e:
            get_logger().exception(f"Failed to edit comment, error: {e}")

    def remove_comment(self, comment: Comment):
        try:
            self.azure_devops_client.delete_comment(
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
                thread_id=comment.thread_id,
                comment_id=comment.id,
                project=self.workspace_slug,
            )
        except Exception as e:
            get_logger().exception(f"Failed to remove comment, error: {e}")

    def publish_labels(self, pr_types):
        try:
            for pr_type in pr_types:
                self.azure_devops_client.create_pull_request_label(
                    label={"name": pr_type},
                    project=self.workspace_slug,
                    repository_id=self.repo_slug,
                    pull_request_id=self.pr_num,
                )
        except Exception as e:
            get_logger().warning(f"Failed to publish labels, error: {e}")

    def get_pr_labels(self, update=False):
        try:
            labels = self.azure_devops_client.get_pull_request_labels(
                project=self.workspace_slug,
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
            )
            return [label.name for label in labels]
        except Exception as e:
            get_logger().exception(f"Failed to get labels, error: {e}")
            return []

    def is_supported(self, capability: str) -> bool:
        return True

    def set_pr(self, pr_url: str):
        self.pr_url = pr_url
        self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
        self.pr = self._get_pr()

    def get_repo_settings(self):
        try:
            contents = self.azure_devops_client.get_item_content(
                repository_id=self.repo_slug,
                project=self.workspace_slug,
                download=False,
                include_content_metadata=False,
                include_content=True,
                path=".pr_agent.toml",
            )
            return list(contents)[0]
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().error(f"Failed to get repo settings, error: {e}")
            return ""

    def get_files(self):
        files = []
        for i in self.azure_devops_client.get_pull_request_commits(
                project=self.workspace_slug,
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
        ):
            changes_obj = self.azure_devops_client.get_changes(
                project=self.workspace_slug,
                repository_id=self.repo_slug,
                commit_id=i.commit_id,
            )

            for c in changes_obj.changes:
                files.append(c["item"]["path"])
        return list(set(files))

    def get_diff_files(self) -> list[FilePatchInfo]:
        try:

            if self.diff_files:
                return self.diff_files

            base_sha = self.pr.last_merge_target_commit
            head_sha = self.pr.last_merge_commit

            # Get PR iterations
            iterations = self.azure_devops_client.get_pull_request_iterations(
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
                project=self.workspace_slug
            )
            changes = None
            if iterations:
                iteration_id = iterations[-1].id  # Get the last iteration (most recent changes)

                # Get changes for the iteration
                changes = self.azure_devops_client.get_pull_request_iteration_changes(
                    repository_id=self.repo_slug,
                    pull_request_id=self.pr_num,
                    iteration_id=iteration_id,
                    project=self.workspace_slug
                )
            diff_files = []
            diffs = []
            diff_types = {}
            if changes:
                for change in changes.change_entries:
                    item = change.additional_properties.get('item', {})
                    path = item.get('path', None)
                    if path:
                        diffs.append(path)
                        diff_types[path] = change.additional_properties.get('changeType', 'Unknown')

            # wrong implementation - gets all the files that were changed in any commit in the PR
            # commits = self.azure_devops_client.get_pull_request_commits(
            #     project=self.workspace_slug,
            #     repository_id=self.repo_slug,
            #     pull_request_id=self.pr_num,
            # )
            #
            # diff_files = []
            # diffs = []
            # diff_types = {}

            # for c in commits:
            #     changes_obj = self.azure_devops_client.get_changes(
            #         project=self.workspace_slug,
            #         repository_id=self.repo_slug,
            #         commit_id=c.commit_id,
            #     )
            #     for i in changes_obj.changes:
            #         if i["item"]["gitObjectType"] == "tree":
            #             continue
            #         diffs.append(i["item"]["path"])
            #         diff_types[i["item"]["path"]] = i["changeType"]
            #
            # diffs = list(set(diffs))

            diffs_original = diffs
            diffs = filter_ignored(diffs_original, 'azure')
            if diffs_original != diffs:
                try:
                    get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
                    {"files": diffs_original,  # diffs is just a list of names
                     "filtered_files": diffs})
                except Exception:
                    pass

            invalid_files_names = []
            for file in diffs:
                if not is_valid_file(file):
                    invalid_files_names.append(file)
                    continue

                version = GitVersionDescriptor(
                    version=head_sha.commit_id, version_type="commit"
                )
                try:
                    new_file_content_str = self.azure_devops_client.get_item(
                        repository_id=self.repo_slug,
                        path=file,
                        project=self.workspace_slug,
                        version_descriptor=version,
                        download=False,
                        include_content=True,
                    )

                    new_file_content_str = new_file_content_str.content
                except Exception as error:
                    get_logger().error(f"Failed to retrieve new file content of {file} at version {version}", error=error)
                    # get_logger().error(
                    #     "Failed to retrieve new file content of %s at version %s. Error: %s",
                    #     file,
                    #     version,
                    #     str(error),
                    # )
                    new_file_content_str = ""

                edit_type = EDIT_TYPE.MODIFIED
                if diff_types[file] == "add":
                    edit_type = EDIT_TYPE.ADDED
                elif diff_types[file] == "delete":
                    edit_type = EDIT_TYPE.DELETED
                elif "rename" in diff_types[file]: # diff_type can be `rename` | `edit, rename`
                    edit_type = EDIT_TYPE.RENAMED

                version = GitVersionDescriptor(
                    version=base_sha.commit_id, version_type="commit"
                )
                if edit_type == EDIT_TYPE.ADDED or edit_type == EDIT_TYPE.RENAMED:
                    original_file_content_str = ""
                else:
                    try:
                        original_file_content_str = self.azure_devops_client.get_item(
                            repository_id=self.repo_slug,
                            path=file,
                            project=self.workspace_slug,
                            version_descriptor=version,
                            download=False,
                            include_content=True,
                        )
                        original_file_content_str = original_file_content_str.content
                    except Exception as error:
                        get_logger().error(f"Failed to retrieve original file content of {file} at version {version}", error=error)
                        original_file_content_str = ""

                patch = load_large_diff(
                    file, new_file_content_str, original_file_content_str, show_warning=False
                ).rstrip()

                # count number of lines added and removed
                patch_lines = patch.splitlines(keepends=True)
                num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
                num_minus_lines = len([line for line in patch_lines if line.startswith('-')])

                diff_files.append(
                    FilePatchInfo(
                        original_file_content_str,
                        new_file_content_str,
                        patch=patch,
                        filename=file,
                        edit_type=edit_type,
                        num_plus_lines=num_plus_lines,
                        num_minus_lines=num_minus_lines,
                    )
                )
            get_logger().info(f"Invalid files: {invalid_files_names}")

            self.diff_files = diff_files
            return diff_files
        except Exception as e:
            get_logger().exception(f"Failed to get diff files, error: {e}")
            return []

    def publish_comment(self, pr_comment: str, is_temporary: bool = False, thread_context=None) -> Comment:
        if is_temporary and not get_settings().config.publish_output_progress:
            get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}")
            return None
        comment = Comment(content=pr_comment)

        status = get_settings().azure_devops.get("default_comment_status", "closed")
        thread = CommentThread(comments=[comment], thread_context=thread_context, status=status)
        thread_response = self.azure_devops_client.create_thread(
            comment_thread=thread,
            project=self.workspace_slug,
            repository_id=self.repo_slug,
            pull_request_id=self.pr_num,
        )
        created_comment = thread_response.comments[0]
        created_comment.thread_id = thread_response.id
        if is_temporary:
            self.temp_comments.append(created_comment)
        return created_comment

    def publish_persistent_comment(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        return self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)

    def publish_description(self, pr_title: str, pr_body: str):
        if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH:

            usage_guide_text='<details> <summary><strong>✨ Describe tool usage guide:</strong></summary><hr>'
            ind = pr_body.find(usage_guide_text)
            if ind != -1:
                pr_body = pr_body[:ind]

            if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH:
                changes_walkthrough_text = PRDescriptionHeader.FILE_WALKTHROUGH.value
                ind = pr_body.find(changes_walkthrough_text)
                if ind != -1:
                    pr_body = pr_body[:ind]

            if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH:
                trunction_message = " ... (description truncated due to length limit)"
                pr_body = pr_body[:MAX_PR_DESCRIPTION_AZURE_LENGTH - len(trunction_message)] + trunction_message
                get_logger().warning("PR description was truncated due to length limit")
        try:
            updated_pr = GitPullRequest()
            updated_pr.title = pr_title
            updated_pr.description = pr_body
            self.azure_devops_client.update_pull_request(
                project=self.workspace_slug,
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
                git_pull_request_to_update=updated_pr,
            )
        except Exception as e:
            get_logger().exception(
                f"Could not update pull request {self.pr_num} description: {e}"
            )

    def remove_initial_comment(self):
        try:
            for comment in self.temp_comments:
                self.remove_comment(comment)
        except Exception as e:
            get_logger().exception(f"Failed to remove temp comments, error: {e}")

    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])

    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
                              absolute_position: int = None):
        position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),
                                                                                relevant_file.strip('`'),
                                                                                relevant_line_in_file,
                                                                                absolute_position)
        if position == -1:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            subject_type = "FILE"
        else:
            subject_type = "LINE"
        path = relevant_file.strip()
        return dict(body=body, path=path, position=position, absolute_position=absolute_position) if subject_type == "LINE" else {}

    def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False):
            overall_success = True
            for comment in comments:
                try:
                    self.publish_comment(comment["body"],
                                        thread_context={
                                            "filePath": comment["path"],
                                            "rightFileStart": {
                                                "line": comment["absolute_position"],
                                                "offset": comment["position"],
                                            },
                                            "rightFileEnd": {
                                                "line": comment["absolute_position"],
                                                "offset": comment["position"],
                                            },
                                        })
                    if get_settings().config.verbosity_level >= 2:
                        get_logger().info(
                            f"Published code suggestion on {self.pr_num} at {comment['path']}"
                        )
                except Exception as e:
                    if get_settings().config.verbosity_level >= 2:
                        get_logger().error(f"Failed to publish code suggestion, error: {e}")
                    overall_success = False
            return overall_success

    def get_title(self):
        return self.pr.title

    def get_languages(self):
        languages = []
        files = self.azure_devops_client.get_items(
            project=self.workspace_slug,
            repository_id=self.repo_slug,
            recursion_level="Full",
            include_content_metadata=True,
            include_links=False,
            download=False,
        )
        for f in files:
            if f.git_object_type == "blob":
                file_name, file_extension = os.path.splitext(f.path)
                languages.append(file_extension[1:])

        extension_counts = {}
        for ext in languages:
            if ext != "":
                extension_counts[ext] = extension_counts.get(ext, 0) + 1

        total_extensions = sum(extension_counts.values())

        extension_percentages = {
            ext: (count / total_extensions) * 100
            for ext, count in extension_counts.items()
        }

        return extension_percentages

    def get_pr_branch(self):
        pr_info = self.azure_devops_client.get_pull_request_by_id(
            project=self.workspace_slug, pull_request_id=self.pr_num
        )
        source_branch = pr_info.source_ref_name.split("/")[-1]
        return source_branch

    def get_user_id(self):
        return 0

    def get_issue_comments(self) -> list[Comment]:
        threads = self.azure_devops_client.get_threads(repository_id=self.repo_slug, pull_request_id=self.pr_num, project=self.workspace_slug)
        threads.reverse()
        comment_list = []
        for thread in threads:
            for comment in thread.comments:
                if comment.content and comment not in comment_list:
                    comment.body = comment.content
                    comment.thread_id = thread.id
                    comment_list.append(comment)
        return comment_list

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
        return True

    def set_like(self, thread_id: int, comment_id: int, create: bool = True):
        if create:
            self.azure_devops_client.create_like(self.repo_slug, self.pr_num, thread_id, comment_id, project=self.workspace_slug)
        else:
            self.azure_devops_client.delete_like(self.repo_slug, self.pr_num, thread_id, comment_id, project=self.workspace_slug)
            
    def set_thread_status(self, thread_id: int, status: str):
        try:
            self.azure_devops_client.update_thread(CommentThread(status=status), self.repo_slug, self.pr_num, thread_id, self.workspace_slug)
        except Exception as e:
            get_logger().exception(f"Failed to set thread status, error: {e}")
            
    def reply_to_thread(self, thread_id: int, body: str, is_temporary: bool = False) -> Comment:
        try:
            comment = Comment(content=body)
            response = self.azure_devops_client.create_comment(comment, self.repo_slug, self.pr_num, thread_id, self.workspace_slug)
            response.thread_id = thread_id
            if is_temporary:
                self.temp_comments.append(response)
            return response
        except Exception as e:
            get_logger().exception(f"Failed to reply to thread, error: {e}")
    
    def get_thread_context(self, thread_id: int) -> CommentThreadContext:
        try:
            thread = self.azure_devops_client.get_pull_request_thread(self.repo_slug, self.pr_num, thread_id, self.workspace_slug)
            return thread.thread_context
        except Exception as e:
            get_logger().exception(f"Failed to set thread status, error: {e}")
    
    @staticmethod
    def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]:
        parsed_url = urlparse(pr_url)
        path_parts = parsed_url.path.strip("/").split("/")
        num_parts = len(path_parts)
        if num_parts < 5:
            raise ValueError("The provided URL has insufficient path components for an Azure DevOps PR URL")
        
        # Verify that the second-to-last path component is "pullrequest"
        if path_parts[num_parts - 2] != "pullrequest":
            raise ValueError("The provided URL does not follow the expected Azure DevOps PR URL format")

        workspace_slug = path_parts[num_parts - 5]
        repo_slug = path_parts[num_parts - 3]
        try:
            pr_number = int(path_parts[num_parts - 1])
        except ValueError as e:
            raise ValueError("Cannot parse PR number in the provided URL") from e

        return workspace_slug, repo_slug, pr_number

    @staticmethod
    def _get_azure_devops_client() -> Tuple[GitClient, WorkItemTrackingClient]:
        org = get_settings().azure_devops.get("org", None)
        pat = get_settings().azure_devops.get("pat", None)

        if not org:
            raise ValueError("Azure DevOps organization is required")

        if pat:
            auth_token = pat
        else:
            try:
                # try to use azure default credentials
                # see https://learn.microsoft.com/en-us/python/api/overview/azure/identity-readme?view=azure-python
                # for usage and env var configuration of user-assigned managed identity, local machine auth etc.
                get_logger().info("No PAT found in settings, trying to use Azure Default Credentials.")
                credentials = DefaultAzureCredential()
                accessToken = credentials.get_token(ADO_APP_CLIENT_DEFAULT_ID)
                auth_token = accessToken.token
            except Exception as e:
                get_logger().error(f"No PAT found in settings, and Azure Default Authentication failed, error: {e}")
                raise

        credentials = BasicAuthentication("", auth_token)
        azure_devops_connection = Connection(base_url=org, creds=credentials)
        azure_devops_client = azure_devops_connection.clients.get_git_client()
        azure_devops_board_client = azure_devops_connection.clients.get_work_item_tracking_client()

        return azure_devops_client, azure_devops_board_client

    def _get_repo(self):
        if self.repo is None:
            self.repo = self.azure_devops_client.get_repository(
                project=self.workspace_slug, repository_id=self.repo_slug
            )
        return self.repo

    def _get_pr(self):
        self.pr = self.azure_devops_client.get_pull_request_by_id(
            pull_request_id=self.pr_num, project=self.workspace_slug
        )
        return self.pr

    def get_commit_messages(self):
        return ""  # not implemented yet

    def get_pr_id(self):
        try:
            pr_id = f"{self.workspace_slug}/{self.repo_slug}/{self.pr_num}"
            return pr_id
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Failed to get PR id, error: {e}")
            return ""

    def publish_file_comments(self, file_comments: list) -> bool:
        pass

    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
        return self.pr_url+f"?_a=files&path={relevant_file}"

    def get_comment_url(self, comment) -> str:
        return self.pr_url + "?discussionId=" + str(comment.thread_id)

    def get_latest_commit_url(self) -> str:
        commits = self.azure_devops_client.get_pull_request_commits(self.repo_slug, self.pr_num, self.workspace_slug)
        last = commits[0]
        url = self.azure_devops_client.normalized_url + "/" + self.workspace_slug + "/_git/" + self.repo_slug + "/commit/" + last.commit_id
        return url

    def get_linked_work_items(self) -> list:
        """
        Get linked work items from the PR.
        """
        try:
            work_items = self.azure_devops_client.get_pull_request_work_item_refs(
                project=self.workspace_slug,
                repository_id=self.repo_slug,
                pull_request_id=self.pr_num,
            )
            ids = [work_item.id for work_item in work_items]
            if not work_items:
                return []
            items = self.get_work_items(ids)
            return items
        except Exception as e:
            get_logger().exception(f"Failed to get linked work items, error: {e}")
            return []

    def get_work_items(self, work_item_ids: list) -> list:
        """
        Get work items by their IDs.
        """
        try:
            raw_work_items = self.azure_devops_board_client.get_work_items(
                project=self.workspace_slug,
                ids=work_item_ids,
            )
            work_items = []
            for item in raw_work_items:
                work_items.append(
                    {
                        "id": item.id,
                        "title": item.fields.get("System.Title", ""),
                        "url": item.url,
                        "body": item.fields.get("System.Description", ""),
                        "acceptance_criteria": item.fields.get(
                            "Microsoft.VSTS.Common.AcceptanceCriteria", ""
                        ),
                        "tags": item.fields.get("System.Tags", "").split("; ") if item.fields.get("System.Tags") else [],
                    }
                )
            return work_items
        except Exception as e:
            get_logger().exception(f"Failed to get work items, error: {e}")
            return []


================================================
FILE: pr_agent/git_providers/bitbucket_provider.py
================================================
import difflib
import json
import re
from typing import Optional, Tuple
from urllib.parse import urlparse

import requests
from atlassian.bitbucket import Cloud
from starlette_context import context

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo

from ..algo.file_filter import filter_ignored
from ..algo.language_handler import is_valid_file
from ..algo.utils import find_line_number_of_relevant_line_in_file
from ..config_loader import get_settings
from ..log import get_logger
from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider


def _gef_filename(diff):
    if diff.new.path:
        return diff.new.path
    return diff.old.path


class BitbucketProvider(GitProvider):
    def __init__(
        self, pr_url: Optional[str] = None, incremental: Optional[bool] = False
    ):
        s = requests.Session()
        s.headers["Content-Type"] = "application/json"

        self.auth_type = get_settings().get("BITBUCKET.AUTH_TYPE", "bearer")

        try:
            def get_token(token_name, auth_type_name):
                token = get_settings().get(f"BITBUCKET.{token_name.upper()}", None)
                if not token:
                    raise ValueError(f"{auth_type_name} auth requires a token")
                return token

            if self.auth_type == "basic":
                self.basic_token = get_token("basic_token", "Basic")
                s.headers["Authorization"] = f"Basic {self.basic_token}"
            elif self.auth_type == "bearer":
                try:
                    self.bearer_token = context.get("bitbucket_bearer_token", None)
                except:
                    self.bearer_token = None

                if not self.bearer_token:
                    self.bearer_token = get_token("bearer_token", "Bearer")
                s.headers["Authorization"] = f"Bearer {self.bearer_token}"
            else:
                 raise ValueError(f"Unsupported auth_type: {self.auth_type}")

        except Exception as e:
            get_logger().exception(f"Failed to initialize Bitbucket authentication: {e}")
            raise

        self.headers = s.headers
        self.bitbucket_client = Cloud(session=s)
        self.max_comment_length = 31000
        self.workspace_slug = None
        self.repo_slug = None
        self.repo = None
        self.pr_num = None
        self.pr = None
        self.pr_url = pr_url
        self.temp_comments = []
        self.incremental = incremental
        self.diff_files = None
        self.git_files = None
        if pr_url:
            self.set_pr(pr_url)
        self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"]
        self.bitbucket_pull_request_api_url = self.pr._BitbucketBase__data["links"]['self']['href']

    def get_repo_settings(self):
        try:
            url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/"
                   f"{self.pr.destination_branch}/.pr_agent.toml")
            response = requests.request("GET", url, headers=self.headers)
            if response.status_code == 404:  # not found
                return ""
            contents = response.text.encode('utf-8')
            return contents
        except Exception:
            return ""

    def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket does not support issue url, so ignore param
        try:
            parsed_url = urlparse(self.pr_url)
            return f"{parsed_url.scheme}://{parsed_url.netloc}/{self.workspace_slug}/{self.repo_slug}.git"
        except Exception as e:
            get_logger().exception(f"url is not a valid merge requests url: {self.pr_url}")
            return ""

    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.
    # Example: git clone git clone https://bitbucket.org/codiumai/pr-agent.git and branch: main -> prefix: "https://bitbucket.org/codiumai/pr-agent/src/main", suffix: ""
    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.
    def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:
        scheme_and_netloc = None
        if repo_git_url:
            parsed_git_url = urlparse(repo_git_url)
            scheme_and_netloc = parsed_git_url.scheme + "://" + parsed_git_url.netloc
            repo_path = parsed_git_url.path.split('.git')[0][1:] #/<workspace>/<repo>.git -> <workspace>/<repo>
            if repo_path.count('/') != 1:
                get_logger().error(f"repo_git_url is not a valid git repo url: {repo_git_url}")
                return ("", "")
            workspace_name, project_name = repo_path.split('/')
        else:
            desired_branch = self.get_repo_default_branch()
            parsed_pr_url = urlparse(self.pr_url)
            scheme_and_netloc = parsed_pr_url.scheme + "://" + parsed_pr_url.netloc
            workspace_name, project_name = (self.workspace_slug, self.repo_slug)
        prefix = f"{scheme_and_netloc}/{workspace_name}/{project_name}/src/{desired_branch}"
        suffix = "" #None
        return (prefix, suffix)


    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        """
        Publishes code suggestions as comments on the PR.
        """
        post_parameters_list = []
        for suggestion in code_suggestions:
            body = suggestion["body"]
            original_suggestion = suggestion.get('original_suggestion', None)  # needed for diff code
            if original_suggestion:
                try:
                    existing_code = original_suggestion['existing_code'].rstrip() + "\n"
                    improved_code = original_suggestion['improved_code'].rstrip() + "\n"
                    diff = difflib.unified_diff(existing_code.split('\n'),
                                                improved_code.split('\n'), n=999)
                    patch_orig = "\n".join(diff)
                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
                    diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
                    # replace ```suggestion ... ``` with diff_code, using regex:
                    body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
                except Exception as e:
                    get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
                    continue

            relevant_file = suggestion["relevant_file"]
            relevant_lines_start = suggestion["relevant_lines_start"]
            relevant_lines_end = suggestion["relevant_lines_end"]

            if not relevant_lines_start or relevant_lines_start == -1:
                get_logger().exception(
                    f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
                )
                continue

            if relevant_lines_end < relevant_lines_start:
                get_logger().exception(
                    f"Failed to publish code suggestion, "
                    f"relevant_lines_end is {relevant_lines_end} and "
                    f"relevant_lines_start is {relevant_lines_start}"
                )
                continue

            if relevant_lines_end > relevant_lines_start:
                post_parameters = {
                    "body": body,
                    "path": relevant_file,
                    "line": relevant_lines_end,
                    "start_line": relevant_lines_start,
                    "start_side": "RIGHT",
                }
            else:  # API is different for single line comments
                post_parameters = {
                    "body": body,
                    "path": relevant_file,
                    "line": relevant_lines_start,
                    "side": "RIGHT",
                }
            post_parameters_list.append(post_parameters)

        try:
            self.publish_inline_comments(post_parameters_list)
            return True
        except Exception as e:
            get_logger().error(f"Bitbucket failed to publish code suggestion, error: {e}")
            return False

    def publish_file_comments(self, file_comments: list) -> bool:
        pass

    def is_supported(self, capability: str) -> bool:
        if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown',
                            'publish_file_comments']:
            return False
        return True

    def set_pr(self, pr_url: str):
        self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
        self.pr = self._get_pr()

    def get_files(self):
        try:
            git_files = context.get("git_files", None)
            if git_files:
                return git_files
            self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()]
            context["git_files"] = self.git_files
            return self.git_files
        except Exception:
            if not self.git_files:
                self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()]
            return self.git_files

    def get_diff_files(self) -> list[FilePatchInfo]:
        if self.diff_files:
            return self.diff_files

        diffs_original = list(self.pr.diffstat())
        diffs = filter_ignored(diffs_original, 'bitbucket')
        if diffs != diffs_original:
            try:
                names_original = [d.new.path for d in diffs_original]
                names_kept = [d.new.path for d in diffs]
                names_filtered = list(set(names_original) - set(names_kept))
                get_logger().info(f"Filtered out [ignore] files for PR", extra={
                    'original_files': names_original,
                    'names_kept': names_kept,
                    'names_filtered': names_filtered

                })
            except Exception as e:
                pass

        # get the pr patches
        try:
            pr_patches = self.pr.diff()
        except Exception as e:
            # Try different encodings if UTF-8 fails
            get_logger().warning(f"Failed to decode PR patch with utf-8, error: {e}")
            encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16']
            pr_patches = None
            for encoding in encodings_to_try:
                try:
                    pr_patches = self.pr.diff(encoding=encoding)
                    get_logger().info(f"Successfully decoded PR patch with encoding {encoding}")
                    break
                except UnicodeDecodeError:
                    continue

            if pr_patches is None:
                raise ValueError(f"Failed to decode PR patch with encodings {encodings_to_try}")

        diff_split = ["diff --git" + x for x in pr_patches.split("diff --git") if x.strip()]
        # filter all elements of 'diff_split' that are of indices in 'diffs_original' that are not in 'diffs'
        if len(diff_split) > len(diffs) and len(diffs_original) == len(diff_split):
            diff_split = [diff_split[i] for i in range(len(diff_split)) if diffs_original[i] in diffs]
        if len(diff_split) != len(diffs):
            get_logger().error(f"Error - failed to split the diff into {len(diffs)} parts")
            return []
        # bitbucket diff has a header for each file, we need to remove it:
        # "diff --git filename
        # new file mode 100644 (optional)
        #  index caa56f0..61528d7 100644
        #   --- a/pr_agent/cli_pip.py
        #  +++ b/pr_agent/cli_pip.py
        #   @@ -... @@"
        for i, _ in enumerate(diff_split):
            diff_split_lines = diff_split[i].splitlines()
            if (len(diff_split_lines) >= 6) and \
                    ((diff_split_lines[2].startswith("---") and
                      diff_split_lines[3].startswith("+++") and
                      diff_split_lines[4].startswith("@@")) or
                     (diff_split_lines[3].startswith("---") and  # new or deleted file
                      diff_split_lines[4].startswith("+++") and
                      diff_split_lines[5].startswith("@@"))):
                diff_split[i] = "\n".join(diff_split_lines[4:])
            else:
                if diffs[i].data.get('lines_added', 0) == 0 and diffs[i].data.get('lines_removed', 0) == 0:
                    diff_split[i] = ""
                elif len(diff_split_lines) <= 3:
                    diff_split[i] = ""
                    get_logger().info(f"Disregarding empty diff for file {_gef_filename(diffs[i])}")
                else:
                    get_logger().warning(f"Bitbucket failed to get diff for file {_gef_filename(diffs[i])}")
                    diff_split[i] = ""

        invalid_files_names = []
        diff_files = []
        counter_valid = 0
        # get full files
        for index, diff in enumerate(diffs):
            file_path = _gef_filename(diff)
            if not is_valid_file(file_path):
                invalid_files_names.append(file_path)
                continue

            try:
                counter_valid += 1
                if get_settings().get("bitbucket_app.avoid_full_files", False):
                    original_file_content_str = ""
                    new_file_content_str = ""
                elif counter_valid < MAX_FILES_ALLOWED_FULL // 2:  # factor 2 because bitbucket has limited API calls
                    if diff.old.get_data("links"):
                        original_file_content_str = self._get_pr_file_content(
                            diff.old.get_data("links")['self']['href'])
                    else:
                        original_file_content_str = ""
                    if diff.new.get_data("links"):
                        new_file_content_str = self._get_pr_file_content(diff.new.get_data("links")['self']['href'])
                    else:
                        new_file_content_str = ""
                else:
                    if counter_valid == MAX_FILES_ALLOWED_FULL // 2:
                        get_logger().info(
                            f"Bitbucket too many files in PR, will avoid loading full content for rest of files")
                    original_file_content_str = ""
                    new_file_content_str = ""
            except Exception as e:
                get_logger().exception(f"Error - bitbucket failed to get file content, error: {e}")
                original_file_content_str = ""
                new_file_content_str = ""

            file_patch_canonic_structure = FilePatchInfo(
                original_file_content_str,
                new_file_content_str,
                diff_split[index],
                file_path,
            )

            if diff.data['status'] == 'added':
                file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED
            elif diff.data['status'] == 'removed':
                file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED
            elif diff.data['status'] == 'modified':
                file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED
            elif diff.data['status'] == 'renamed':
                file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED
            diff_files.append(file_patch_canonic_structure)

        if invalid_files_names:
            get_logger().info(f"Disregarding files with invalid extensions:\n{invalid_files_names}")

        self.diff_files = diff_files
        return diff_files

    def get_latest_commit_url(self):
        return self.pr.data['source']['commit']['links']['html']['href']

    def get_comment_url(self, comment):
        return comment.data['links']['html']['href']

    def publish_persistent_comment(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        try:
            for comment in self.pr.comments():
                body = comment.raw
                if initial_header in body:
                    latest_commit_url = self.get_latest_commit_url()
                    comment_url = self.get_comment_url(comment)
                    if update_header:
                        updated_header = f"{initial_header}\n\n#### ({name.capitalize()} updated until commit {latest_commit_url})\n"
                        pr_comment_updated = pr_comment.replace(initial_header, updated_header)
                    else:
                        pr_comment_updated = pr_comment
                    get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
                    d = {"content": {"raw": pr_comment_updated}}
                    response = comment._update_data(comment.put(None, data=d))
                    if final_update_message:
                        self.publish_comment(
                            f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}")
                    return
        except Exception as e:
            get_logger().exception(f"Failed to update persistent review, error: {e}")
            pass
        self.publish_comment(pr_comment)

    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        if is_temporary and not get_settings().config.publish_output_progress:
            get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}")
            return None
        pr_comment = self.limit_output_characters(pr_comment, self.max_comment_length)
        comment = self.pr.comment(pr_comment)
        if is_temporary:
            self.temp_comments.append(comment["id"])
        return comment

    def edit_comment(self, comment, body: str):
        try:
            body = self.limit_output_characters(body, self.max_comment_length)
            comment.update(body)
        except Exception as e:
            get_logger().exception(f"Failed to update comment, error: {e}")

    def remove_initial_comment(self):
        try:
            for comment in self.temp_comments:
                self.remove_comment(comment)
        except Exception as e:
            get_logger().exception(f"Failed to remove temp comments, error: {e}")

    def remove_comment(self, comment):
        try:
            self.pr.delete(f"comments/{comment}")
        except Exception as e:
            get_logger().exception(f"Failed to remove comment, error: {e}")

    # function to create_inline_comment
    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
                              absolute_position: int = None):
        body = self.limit_output_characters(body, self.max_comment_length)
        position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(),
                                                                                relevant_file.strip('`'),
                                                                                relevant_line_in_file,
                                                                                absolute_position)
        if position == -1:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            subject_type = "FILE"
        else:
            subject_type = "LINE"
        path = relevant_file.strip()
        return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {}

    def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None):
        comment = self.limit_output_characters(comment, self.max_comment_length)
        payload = json.dumps({
            "content": {
                "raw": comment,
            },
            "inline": {
                "to": from_line,
                "path": file
            },
        })
        response = requests.request(
            "POST", self.bitbucket_comment_api_url, data=payload, headers=self.headers
        )
        return response

    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
        if relevant_line_start == -1:
            link = f"{self.pr_url}/#L{relevant_file}"
        else:
            link = f"{self.pr_url}/#L{relevant_file}T{relevant_line_start}"
        return link

    def generate_link_to_relevant_line_number(self, suggestion) -> str:
        try:
            relevant_file = suggestion['relevant_file'].strip('`').strip("'").rstrip()
            relevant_line_str = suggestion['relevant_line'].rstrip()
            if not relevant_line_str:
                return ""

            diff_files = self.get_diff_files()
            position, absolute_position = find_line_number_of_relevant_line_in_file \
                (diff_files, relevant_file, relevant_line_str)

            if absolute_position != -1 and self.pr_url:
                link = f"{self.pr_url}/#L{relevant_file}T{absolute_position}"
                return link
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Failed adding line link, error: {e}")

        return ""

    def publish_inline_comments(self, comments: list[dict]):
        for comment in comments:
            if 'position' in comment:
                self.publish_inline_comment(comment['body'], comment['position'], comment['path'])
            elif 'start_line' in comment:  # multi-line comment
                # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452
                self.publish_inline_comment(comment['body'], comment['start_line'], comment['path'])
            elif 'line' in comment:  # single-line comment
                self.publish_inline_comment(comment['body'], comment['line'], comment['path'])
            else:
                get_logger().error(f"Could not publish inline comment {comment}")

    def get_title(self):
        return self.pr.title

    def get_languages(self):
        languages = {self._get_repo().get_data("language"): 0}
        return languages

    def get_pr_branch(self):
        return self.pr.source_branch

    # This function attempts to get the default branch of the repository. As a fallback, uses the PR destination branch.
    # Note: Must be running from a PR context.
    def get_repo_default_branch(self):
        try:
            url_repo = f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/"
            response_repo = requests.request("GET", url_repo, headers=self.headers).json()
            return response_repo['mainbranch']['name']
        except:
            return self.pr.destination_branch

    def get_pr_owner_id(self) -> str | None:
        return self.workspace_slug

    def get_pr_description_full(self):
        return self.pr.description

    def get_user_id(self):
        return 0

    def get_issue_comments(self):
        raise NotImplementedError(
            "Bitbucket provider does not support issue comments yet"
        )

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
        return True

    @staticmethod
    def _parse_pr_url(pr_url: str) -> Tuple[str, int, int]:
        parsed_url = urlparse(pr_url)

        if "bitbucket.org" not in parsed_url.netloc:
            raise ValueError("The provided URL is not a valid Bitbucket URL")

        path_parts = parsed_url.path.strip("/").split("/")

        if len(path_parts) < 4 or path_parts[2] != "pull-requests":
            raise ValueError(
                "The provided URL does not appear to be a Bitbucket PR URL"
            )

        workspace_slug = path_parts[0]
        repo_slug = path_parts[1]
        try:
            pr_number = int(path_parts[3])
        except ValueError as e:
            raise ValueError("Unable to convert PR number to integer") from e

        return workspace_slug, repo_slug, pr_number

    def _get_repo(self):
        if self.repo is None:
            self.repo = self.bitbucket_client.workspaces.get(
                self.workspace_slug
            ).repositories.get(self.repo_slug)
        return self.repo

    def _get_pr(self):
        return self._get_repo().pullrequests.get(self.pr_num)

    def get_pr_file_content(self, file_path: str, branch: str) -> str:
        try:
            if branch == self.pr.source_branch:
                branch = self.pr.data["source"]["commit"]["hash"]
            elif branch == self.pr.destination_branch:
                branch = self.pr.data["destination"]["commit"]["hash"]
            url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/"
                   f"{branch}/{file_path}")
            response = requests.request("GET", url, headers=self.headers)
            if response.status_code == 404:  # not found
                return ""
            contents = response.text
            return contents
        except Exception:
            return ""

    def create_or_update_pr_file(self, file_path: str, branch: str, contents="", message="") -> None:
        url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/")
        if not message:
            if contents:
                message = f"Update {file_path}"
            else:
                message = f"Create {file_path}"
        files = {file_path: contents}
        data = {
            "message": message,
            "branch": branch
        }
        headers = {'Authorization': self.headers['Authorization']} if 'Authorization' in self.headers else {}
        try:
            requests.request("POST", url, headers=headers, data=data, files=files)
        except Exception:
            get_logger().exception(f"Failed to create empty file {file_path} in branch {branch}")

    def _get_pr_file_content(self, remote_link: str):
        try:
            response = requests.request("GET", remote_link, headers=self.headers)
            if response.status_code == 404:  # not found
                return ""
            contents = response.text
            return contents
        except Exception:
            return ""

    def get_commit_messages(self):
        return ""  # not implemented yet

    # bitbucket does not support labels
    def publish_description(self, pr_title: str, description: str):
        payload = json.dumps({
            "description": description,
            "title": pr_title

        })

        response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload)
        try:
            if response.status_code != 200:
                get_logger().info(f"Failed to update description, error code: {response.status_code}")
        except:
            pass
        return response

    # bitbucket does not support labels
    def publish_labels(self, pr_types: list):
        pass

    # bitbucket does not support labels
    def get_pr_labels(self, update=False):
        pass
    #Clone related
    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
        if "bitbucket.org" not in repo_url_to_clone:
            get_logger().error("Repo URL is not a valid bitbucket URL.")
            return None

        (scheme, base_url) = repo_url_to_clone.split("bitbucket.org")
        if not all([scheme, base_url]):
            get_logger().error(f"repo_url_to_clone: {repo_url_to_clone} is not a valid bitbucket URL.")
            return None

        if self.auth_type == "basic":
            # Basic auth with token
            clone_url = f"{scheme}x-token-auth:{self.basic_token}@bitbucket.org{base_url}"
        elif self.auth_type == "bearer":
            # Bearer token
            clone_url = f"{scheme}x-token-auth:{self.bearer_token}@bitbucket.org{base_url}"
        else:
            # This case should ideally not be reached if __init__ validates auth_type
            get_logger().error(f"Unsupported or uninitialized auth_type: {getattr(self, 'auth_type', 'N/A')}. Returning None")
            return None

        return clone_url


================================================
FILE: pr_agent/git_providers/bitbucket_server_provider.py
================================================
import difflib
import re

from packaging.version import parse as parse_version
from typing import Optional, Tuple
from urllib.parse import quote_plus, urlparse

from atlassian.bitbucket import Bitbucket
from requests.exceptions import HTTPError
import shlex
import subprocess

from ..algo.file_filter import filter_ignored
from ..algo.git_patch_processing import decode_if_bytes
from ..algo.language_handler import is_valid_file
from ..algo.types import EDIT_TYPE, FilePatchInfo
from ..algo.utils import (find_line_number_of_relevant_line_in_file,
                          load_large_diff)
from ..config_loader import get_settings
from ..log import get_logger
from .git_provider import GitProvider, get_git_ssl_env


class BitbucketServerProvider(GitProvider):
    def __init__(
            self, pr_url: Optional[str] = None, incremental: Optional[bool] = False,
            bitbucket_client: Optional[Bitbucket] = None,
    ):
        self.bitbucket_server_url = None
        self.workspace_slug = None
        self.repo_slug = None
        self.repo = None
        self.pr_num = None
        self.pr = None
        self.pr_url = pr_url
        self.temp_comments = []
        self.incremental = incremental
        self.diff_files = None
        self.bitbucket_pull_request_api_url = pr_url
        self.bearer_token = get_settings().get("BITBUCKET_SERVER.BEARER_TOKEN", None)
        # Get username and password from settings
        username = get_settings().get("BITBUCKET_SERVER.USERNAME", None)
        password = get_settings().get("BITBUCKET_SERVER.PASSWORD", None)
        if bitbucket_client: # if Bitbucket client is provided, use it
            self.bitbucket_client = bitbucket_client
            self.bitbucket_server_url = getattr(bitbucket_client, 'url', None) or self._parse_bitbucket_server(pr_url)
        else:
            self.bitbucket_server_url = self._parse_bitbucket_server(pr_url)
            if not self.bitbucket_server_url:
                raise ValueError("Invalid or missing Bitbucket Server URL parsed from PR URL.")
            
            if self.bearer_token:  # if bearer token is provided, use it
                self.bitbucket_client = Bitbucket(
                    url=self.bitbucket_server_url,
                    token=self.bearer_token
                )
            else:  # otherwise use username and password
                self.bitbucket_client = Bitbucket(
                    url=self.bitbucket_server_url,
                    username=username,
                    password=password
                )
        try:
            self.bitbucket_api_version = parse_version(self.bitbucket_client.get("rest/api/1.0/application-properties").get('version'))
        except Exception:
            self.bitbucket_api_version = None

        if pr_url:
            self.set_pr(pr_url)

    def get_git_repo_url(self, pr_url: str=None) -> str: #bitbucket server does not support issue url, so ignore param
        try:
            parsed_url = urlparse(self.pr_url)
            return f"{parsed_url.scheme}://{parsed_url.netloc}/scm/{self.workspace_slug.lower()}/{self.repo_slug.lower()}.git"
        except Exception as e:
            get_logger().exception(f"url is not a valid merge requests url: {self.pr_url}")
            return ""

    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.
    # Example: https://bitbucket.dev.my_inc.com/scm/my_work/my_repo.git and branch: my_branch -> prefix: "https://bitbucket.dev.my_inc.com/projects/MY_WORK/repos/my_repo/browse/src", suffix: "?at=refs%2Fheads%2Fmy_branch"
    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.
    def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:
        workspace_name = None
        project_name = None
        if not repo_git_url:
            workspace_name = self.workspace_slug
            project_name = self.repo_slug
            default_branch_dict = self.bitbucket_client.get_default_branch(workspace_name, project_name)
            if 'displayId' in default_branch_dict:
                desired_branch = default_branch_dict['displayId']
            else:
                get_logger().error(f"Cannot obtain default branch for workspace_name={workspace_name}, "
                                   f"project_name={project_name}, default_branch_dict={default_branch_dict}")
                return ("", "")
        elif '.git' in repo_git_url and 'scm/' in repo_git_url:
            repo_path = repo_git_url.split('.git')[0].split('scm/')[-1]
            if repo_path.count('/') == 1:  # Has to have the form <workspace>/<repo>
                workspace_name, project_name = repo_path.split('/')
        if not workspace_name or not project_name:
            get_logger().error(f"workspace_name or project_name not found in context, either git url: {repo_git_url} or uninitialized workspace/project.")
            return ("", "")
        prefix = f"{self.bitbucket_server_url}/projects/{workspace_name}/repos/{project_name}/browse"
        suffix = f"?at=refs%2Fheads%2F{desired_branch}"
        return (prefix, suffix)

    def get_repo_settings(self):
        try:
            content = self.bitbucket_client.get_content_of_file(self.workspace_slug, self.repo_slug, ".pr_agent.toml")

            return content
        except Exception as e:
            if isinstance(e, HTTPError):
                if e.response.status_code == 404:  # not found
                    return ""

            get_logger().error(f"Failed to load .pr_agent.toml file, error: {e}")
            return ""

    def get_pr_id(self):
        return self.pr_num

    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        """
        Publishes code suggestions as comments on the PR.
        """
        post_parameters_list = []
        for suggestion in code_suggestions:
            body = suggestion["body"]
            original_suggestion = suggestion.get('original_suggestion', None)  # needed for diff code
            if original_suggestion:
                try:
                    existing_code = original_suggestion['existing_code'].rstrip() + "\n"
                    improved_code = original_suggestion['improved_code'].rstrip() + "\n"
                    diff = difflib.unified_diff(existing_code.split('\n'),
                                                improved_code.split('\n'), n=999)
                    patch_orig = "\n".join(diff)
                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
                    diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
                    # replace ```suggestion ... ``` with diff_code, using regex:
                    body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
                except Exception as e:
                    get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}")
                    continue
            relevant_file = suggestion["relevant_file"]
            relevant_lines_start = suggestion["relevant_lines_start"]
            relevant_lines_end = suggestion["relevant_lines_end"]

            if not relevant_lines_start or relevant_lines_start == -1:
                get_logger().warning(
                    f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}"
                )
                continue

            if relevant_lines_end < relevant_lines_start:
                get_logger().warning(
                    f"Failed to publish code suggestion, "
                    f"relevant_lines_end is {relevant_lines_end} and "
                    f"relevant_lines_start is {relevant_lines_start}"
                )
                continue

            if relevant_lines_end > relevant_lines_start:
                # Bitbucket does not support multi-line suggestions so use a code block instead - https://jira.atlassian.com/browse/BSERV-4553
                body = body.replace("```suggestion", "```")
                post_parameters = {
                    "body": body,
                    "path": relevant_file,
                    "line": relevant_lines_end,
                    "start_line": relevant_lines_start,
                    "start_side": "RIGHT",
                }
            else:  # API is different for single line comments
                post_parameters = {
                    "body": body,
                    "path": relevant_file,
                    "line": relevant_lines_start,
                    "side": "RIGHT",
                }
            post_parameters_list.append(post_parameters)

        try:
            self.publish_inline_comments(post_parameters_list)
            return True
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().error(f"Failed to publish code suggestion, error: {e}")
            return False

    def publish_file_comments(self, file_comments: list) -> bool:
        pass

    def is_supported(self, capability: str) -> bool:
        if capability in ['get_issue_comments', 'get_labels', 'gfm_markdown', 'publish_file_comments']:
            return False
        return True

    def set_pr(self, pr_url: str):
        self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url)
        self.pr = self._get_pr()

    def get_file(self, path: str, commit_id: str):
        file_content = ""
        try:
            file_content = self.bitbucket_client.get_content_of_file(self.workspace_slug,
                                                                     self.repo_slug,
                                                                     path,
                                                                     commit_id)
        except HTTPError as e:
            get_logger().debug(f"File {path} not found at commit id: {commit_id}")
        return file_content

    def get_files(self):
        changes = self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num)
        diffstat = [change["path"]['toString'] for change in changes]
        return diffstat

    #gets the best common ancestor: https://git-scm.com/docs/git-merge-base
    @staticmethod
    def get_best_common_ancestor(source_commits_list, destination_commits_list, guaranteed_common_ancestor) -> str:
        destination_commit_hashes = {commit['id'] for commit in destination_commits_list} | {guaranteed_common_ancestor}

        for commit in source_commits_list:
            for parent_commit in commit['parents']:
                if parent_commit['id'] in destination_commit_hashes:
                    return parent_commit['id']

        return guaranteed_common_ancestor

    def get_diff_files(self) -> list[FilePatchInfo]:
        if self.diff_files:
            return self.diff_files

        head_sha = self.pr.fromRef['latestCommit']

        # if Bitbucket api version is >= 8.16 then use the merge-base api for 2-way diff calculation
        if self.bitbucket_api_version is not None and self.bitbucket_api_version >= parse_version("8.16"):
            try:
                base_sha = self.bitbucket_client.get(self._get_merge_base())['id']
            except Exception as e:
                get_logger().error(f"Failed to get the best common ancestor for PR: {self.pr_url}, \nerror: {e}")
                raise e
        else:
            source_commits_list = list(self.bitbucket_client.get_pull_requests_commits(
                self.workspace_slug,
                self.repo_slug,
                self.pr_num
            ))
            # if Bitbucket api version is None or < 7.0 then do a simple diff with a guaranteed common ancestor
            base_sha = source_commits_list[-1]['parents'][0]['id']
            # if Bitbucket api version is 7.0-8.15 then use 2-way diff functionality for the base_sha
            if self.bitbucket_api_version is not None and self.bitbucket_api_version >= parse_version("7.0"):
                try:
                    destination_commits = list(
                        self.bitbucket_client.get_commits(self.workspace_slug, self.repo_slug, base_sha,
                                                          self.pr.toRef['latestCommit']))
                    base_sha = self.get_best_common_ancestor(source_commits_list, destination_commits, base_sha)
                except Exception as e:
                    get_logger().error(
                        f"Failed to get the commit list for calculating best common ancestor for PR: {self.pr_url}, \nerror: {e}")
                    raise e

        diff_files = []
        original_file_content_str = ""
        new_file_content_str = ""

        changes_original = list(self.bitbucket_client.get_pull_requests_changes(self.workspace_slug, self.repo_slug, self.pr_num))
        changes = filter_ignored(changes_original, 'bitbucket_server')
        for change in changes:
            file_path = change['path']['toString']
            if not is_valid_file(file_path.split("/")[-1]):
                get_logger().info(f"Skipping a non-code file: {file_path}")
                continue

            match change['type']:
                case 'ADD':
                    edit_type = EDIT_TYPE.ADDED
                    new_file_content_str = self.get_file(file_path, head_sha)
                    new_file_content_str = decode_if_bytes(new_file_content_str)
                    original_file_content_str = ""
                case 'DELETE':
                    edit_type = EDIT_TYPE.DELETED
                    new_file_content_str = ""
                    original_file_content_str = self.get_file(file_path, base_sha)
                    original_file_content_str = decode_if_bytes(original_file_content_str)
                case 'RENAME':
                    edit_type = EDIT_TYPE.RENAMED
                case _:
                    edit_type = EDIT_TYPE.MODIFIED
                    original_file_content_str = self.get_file(file_path, base_sha)
                    original_file_content_str = decode_if_bytes(original_file_content_str)
                    new_file_content_str = self.get_file(file_path, head_sha)
                    new_file_content_str = decode_if_bytes(new_file_content_str)

            patch = load_large_diff(file_path, new_file_content_str, original_file_content_str, show_warning=False)

            diff_files.append(
                FilePatchInfo(
                    original_file_content_str,
                    new_file_content_str,
                    patch,
                    file_path,
                    edit_type=edit_type,
                )
            )

        self.diff_files = diff_files
        return diff_files

    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        if not is_temporary:
            self.bitbucket_client.add_pull_request_comment(self.workspace_slug, self.repo_slug, self.pr_num, pr_comment)

    def remove_initial_comment(self):
        try:
            for comment in self.temp_comments:
                self.remove_comment(comment)
        except ValueError as e:
            get_logger().exception(f"Failed to remove temp comments, error: {e}")

    def remove_comment(self, comment):
        pass

    # function to create_inline_comment
    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
                              absolute_position: int = None):

        position, absolute_position = find_line_number_of_relevant_line_in_file(
            self.get_diff_files(),
            relevant_file.strip('`'),
            relevant_line_in_file,
            absolute_position
        )
        if position == -1:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            subject_type = "FILE"
        else:
            subject_type = "LINE"
        path = relevant_file.strip()
        return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {}

    def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None):
        payload = {
            "text": comment,
            "severity": "NORMAL",
            "anchor": {
                "diffType": "EFFECTIVE",
                "path": file,
                "lineType": "ADDED",
                "line": from_line,
                "fileType": "TO"
            }
        }

        try:
            self.bitbucket_client.post(self._get_pr_comments_path(), data=payload)
        except Exception as e:
            get_logger().error(f"Failed to publish inline comment to '{file}' at line {from_line}, error: {e}")
            raise e

    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
        if relevant_line_start == -1:
            link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}"
        else:
            link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={relevant_line_start}"
        return link

    def generate_link_to_relevant_line_number(self, suggestion) -> str:
        try:
            relevant_file = suggestion['relevant_file'].strip('`').strip("'").rstrip()
            relevant_line_str = suggestion['relevant_line'].rstrip()
            if not relevant_line_str:
                return ""

            diff_files = self.get_diff_files()
            position, absolute_position = find_line_number_of_relevant_line_in_file \
                (diff_files, relevant_file, relevant_line_str)

            if absolute_position != -1:
                if self.pr:
                    link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}"
                    return link
                else:
                    if get_settings().config.verbosity_level >= 2:
                        get_logger().info(f"Failed adding line link to '{relevant_file}' since PR not set")
            else:
                if get_settings().config.verbosity_level >= 2:
                    get_logger().info(f"Failed adding line link to '{relevant_file}' since position not found")

            if absolute_position != -1 and self.pr_url:
                link = f"{self.pr_url}/diff#{quote_plus(relevant_file)}?t={absolute_position}"
                return link
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Failed adding line link to '{relevant_file}', error: {e}")

        return ""

    def publish_inline_comments(self, comments: list[dict]):
        for comment in comments:
            if 'position' in comment:
                self.publish_inline_comment(comment['body'], comment['position'], comment['path'])
            elif 'start_line' in comment: # multi-line comment
                # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452
                self.publish_inline_comment(comment['body'], comment['start_line'], comment['path'])
            elif 'line' in comment: # single-line comment
                self.publish_inline_comment(comment['body'], comment['line'], comment['path'])
            else:
                get_logger().error(f"Could not publish inline comment: {comment}")

    def get_title(self):
        return self.pr.title

    def get_languages(self):
        return {"yaml": 0}  # devops LOL

    def get_pr_branch(self):
        return self.pr.fromRef['displayId']

    def get_pr_owner_id(self) -> str | None:
        return self.workspace_slug

    def get_pr_description_full(self):
        if hasattr(self.pr, "description"):
            return self.pr.description
        else:
            return None

    def get_user_id(self):
        return 0

    def get_issue_comments(self):
        raise NotImplementedError(
            "Bitbucket provider does not support issue comments yet"
        )

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
        return True

    @staticmethod
    def _parse_bitbucket_server(url: str) -> str:
        # pr url format: f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}"
        parsed_url = urlparse(url)
        server_path = parsed_url.path.split("/projects/")
        if len(server_path) > 1:
            server_path = server_path[0].strip("/")
            return f"{parsed_url.scheme}://{parsed_url.netloc}/{server_path}".strip("/")
        return f"{parsed_url.scheme}://{parsed_url.netloc}"

    @staticmethod
    def _parse_pr_url(pr_url: str) -> Tuple[str, str, int]:
        # pr url format: f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}"
        parsed_url = urlparse(pr_url)

        path_parts = parsed_url.path.strip("/").split("/")

        try:
            projects_index = path_parts.index("projects")
        except ValueError:
            projects_index = -1

        try:
            users_index = path_parts.index("users")
        except ValueError:
            users_index = -1

        if projects_index == -1 and users_index == -1:
            raise ValueError(f"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL")

        if projects_index != -1:
            path_parts = path_parts[projects_index:]
        else:
            path_parts = path_parts[users_index:]

        if len(path_parts) < 6 or path_parts[2] != "repos" or path_parts[4] != "pull-requests":
            raise ValueError(
                f"The provided URL '{pr_url}' does not appear to be a Bitbucket PR URL"
            )

        workspace_slug = path_parts[1]
        if users_index != -1:
            workspace_slug = f"~{workspace_slug}"
        repo_slug = path_parts[3]
        try:
            pr_number = int(path_parts[5])
        except ValueError as e:
            raise ValueError(f"Unable to convert PR number '{path_parts[5]}' to integer") from e

        return workspace_slug, repo_slug, pr_number

    def _get_repo(self):
        if self.repo is None:
            self.repo = self.bitbucket_client.get_repo(self.workspace_slug, self.repo_slug)
        return self.repo

    def _get_pr(self):
        try:
            pr = self.bitbucket_client.get_pull_request(self.workspace_slug, self.repo_slug,
                                                        pull_request_id=self.pr_num)
            return type('new_dict', (object,), pr)
        except Exception as e:
            get_logger().error(f"Failed to get pull request, error: {e}")
            raise e

    def _get_pr_file_content(self, remote_link: str):
        return ""

    def get_commit_messages(self):
        return ""

    # bitbucket does not support labels
    def publish_description(self, pr_title: str, description: str):
        payload = {
            "version": self.pr.version,
            "description": description,
            "title": pr_title,
            "reviewers": self.pr.reviewers  # needs to be sent otherwise gets wiped
        }
        try:
            self.bitbucket_client.update_pull_request(self.workspace_slug, self.repo_slug, str(self.pr_num), payload)
        except Exception as e:
            get_logger().error(f"Failed to update pull request, error: {e}")
            raise e

    # bitbucket does not support labels
    def publish_labels(self, pr_types: list):
        pass

    # bitbucket does not support labels
    def get_pr_labels(self, update=False):
        pass

    def _get_pr_comments_path(self):
        return f"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/comments"

    def _get_merge_base(self):
        return f"rest/api/latest/projects/{self.workspace_slug}/repos/{self.repo_slug}/pull-requests/{self.pr_num}/merge-base"
    # Clone related
    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
        if 'bitbucket.' not in repo_url_to_clone:
            get_logger().error("Repo URL is not a valid bitbucket URL.")
            return None
        bearer_token = self.bearer_token
        if not bearer_token:
            get_logger().error("No bearer token provided. Returning None")
            return None
        # Return unmodified URL as the token is passed via HTTP headers in _clone_inner, as seen below.
        return repo_url_to_clone

    #Overriding the shell command, since for some reason usage of x-token-auth doesn't work, as mentioned here:
    # https://stackoverflow.com/questions/56760396/cloning-bitbucket-server-repo-with-access-tokens
    def _clone_inner(self, repo_url: str, dest_folder: str, operation_timeout_in_seconds: int=None):
        bearer_token = self.bearer_token
        if not bearer_token:
            #Shouldn't happen since this is checked in _prepare_clone, therefore - throwing an exception.
            raise RuntimeError(f"Bearer token is required!")

        cli_args = shlex.split(f"git clone -c http.extraHeader='Authorization: Bearer {bearer_token}' "
                               f"--filter=blob:none --depth 1 {repo_url} {dest_folder}")

        ssl_env = get_git_ssl_env()

        subprocess.run(cli_args, env=ssl_env, check=True,  # check=True will raise an exception if the command fails
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=operation_timeout_in_seconds)


================================================
FILE: pr_agent/git_providers/codecommit_client.py
================================================
import boto3
import botocore


class CodeCommitDifferencesResponse:
    """
    CodeCommitDifferencesResponse is the response object returned from our get_differences() function.
    It maps the JSON response to member variables of this class.
    """

    def __init__(self, json: dict):
        before_blob = json.get("beforeBlob", {})
        after_blob = json.get("afterBlob", {})

        self.before_blob_id = before_blob.get("blobId", "")
        self.before_blob_path = before_blob.get("path", "")
        self.after_blob_id = after_blob.get("blobId", "")
        self.after_blob_path = after_blob.get("path", "")
        self.change_type = json.get("changeType", "")


class CodeCommitPullRequestResponse:
    """
    CodeCommitPullRequestResponse is the response object returned from our get_pr() function.
    It maps the JSON response to member variables of this class.
    """

    def __init__(self, json: dict):
        self.title = json.get("title", "")
        self.description = json.get("description", "")

        self.targets = []
        for target in json.get("pullRequestTargets", []):
            self.targets.append(CodeCommitPullRequestResponse.CodeCommitPullRequestTarget(target))

    class CodeCommitPullRequestTarget:
        """
        CodeCommitPullRequestTarget is a subclass of CodeCommitPullRequestResponse that
        holds details about an individual target commit.
        """

        def __init__(self, json: dict):
            self.source_commit = json.get("sourceCommit", "")
            self.source_branch = json.get("sourceReference", "")
            self.destination_commit = json.get("destinationCommit", "")
            self.destination_branch = json.get("destinationReference", "")


class CodeCommitClient:
    """
    CodeCommitClient is a wrapper around the AWS boto3 SDK for the CodeCommit client
    """

    def __init__(self):
        self.boto_client = None

    def is_supported(self, capability: str) -> bool:
        if capability in ["gfm_markdown"]:
            return False
        return True

    def _connect_boto_client(self):
        try:
            self.boto_client = boto3.client("codecommit")
        except Exception as e:
            raise ValueError(f"Failed to connect to AWS CodeCommit: {e}") from e

    def get_differences(self, repo_name: int, destination_commit: str, source_commit: str):
        """
        Get the differences between two commits in CodeCommit.

        Args:
        - repo_name: Name of the repository
        - destination_commit: Commit hash you want to merge into (the "before" hash) (usually on the main or master branch)
        - source_commit: Commit hash of the code you are adding (the "after" branch)

        Returns:
        - List of CodeCommitDifferencesResponse objects

        Boto3 Documentation:
        - aws codecommit get-differences
        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_differences.html
        """
        if self.boto_client is None:
            self._connect_boto_client()

        # The differences response from AWS is paginated, so we need to iterate through the pages to get all the differences.
        differences = []
        try:
            paginator = self.boto_client.get_paginator("get_differences")
            for page in paginator.paginate(
                repositoryName=repo_name,
                beforeCommitSpecifier=destination_commit,
                afterCommitSpecifier=source_commit,
            ):
                differences.extend(page.get("differences", []))
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException':
                raise ValueError(f"CodeCommit cannot retrieve differences: Repository does not exist: {repo_name}") from e
            raise ValueError(f"CodeCommit cannot retrieve differences for {source_commit}..{destination_commit}") from e
        except Exception as e:
            raise ValueError(f"CodeCommit cannot retrieve differences for {source_commit}..{destination_commit}") from e

        output = []
        for json in differences:
            output.append(CodeCommitDifferencesResponse(json))
        return output

    def get_file(self, repo_name: str, file_path: str, sha_hash: str, optional: bool = False):
        """
        Retrieve a file from CodeCommit.

        Args:
        - repo_name: Name of the repository
        - file_path: Path to the file you are retrieving
        - sha_hash: Commit hash of the file you are retrieving

        Returns:
        - File contents

        Boto3 Documentation:
        - aws codecommit get_file
        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_file.html
        """
        if not file_path:
            return ""

        if self.boto_client is None:
            self._connect_boto_client()

        try:
            response = self.boto_client.get_file(repositoryName=repo_name, commitSpecifier=sha_hash, filePath=file_path)
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException':
                raise ValueError(f"CodeCommit cannot retrieve PR: Repository does not exist: {repo_name}") from e
            # if the file does not exist, but is flagged as optional, then return an empty string
            if optional and e.response["Error"]["Code"] == 'FileDoesNotExistException':
                return ""
            raise ValueError(f"CodeCommit cannot retrieve file '{file_path}' from repository '{repo_name}'") from e
        except Exception as e:
            raise ValueError(f"CodeCommit cannot retrieve file '{file_path}' from repository '{repo_name}'") from e
        if "fileContent" not in response:
            raise ValueError(f"File content is empty for file: {file_path}")

        return response.get("fileContent", "")

    def get_pr(self, repo_name: str, pr_number: int):
        """
        Get a information about a CodeCommit PR.

        Args:
        - repo_name: Name of the repository
        - pr_number: The PR number you are requesting

        Returns:
        - CodeCommitPullRequestResponse object

        Boto3 Documentation:
        - aws codecommit get_pull_request
        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/get_pull_request.html
        """
        if self.boto_client is None:
            self._connect_boto_client()

        try:
            response = self.boto_client.get_pull_request(pullRequestId=str(pr_number))
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == 'PullRequestDoesNotExistException':
                raise ValueError(f"CodeCommit cannot retrieve PR: PR number does not exist: {pr_number}") from e
            if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException':
                raise ValueError(f"CodeCommit cannot retrieve PR: Repository does not exist: {repo_name}") from e
            raise ValueError(f"CodeCommit cannot retrieve PR: {pr_number}: boto client error") from e
        except Exception as e:
            raise ValueError(f"CodeCommit cannot retrieve PR: {pr_number}") from e

        if "pullRequest" not in response:
            raise ValueError("CodeCommit PR number not found: {pr_number}")

        return CodeCommitPullRequestResponse(response.get("pullRequest", {}))

    def publish_description(self, pr_number: int, pr_title: str, pr_body: str):
        """
        Set the title and description on a pull request

        Args:
        - pr_number: the AWS CodeCommit pull request number
        - pr_title: title of the pull request
        - pr_body: body of the pull request

        Returns:
        - None

        Boto3 Documentation:
        - aws codecommit update_pull_request_title
        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/update_pull_request_title.html
        - aws codecommit update_pull_request_description
        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/update_pull_request_description.html
        """
        if self.boto_client is None:
            self._connect_boto_client()

        try:
            self.boto_client.update_pull_request_title(pullRequestId=str(pr_number), title=pr_title)
            self.boto_client.update_pull_request_description(pullRequestId=str(pr_number), description=pr_body)
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == 'PullRequestDoesNotExistException':
                raise ValueError(f"PR number does not exist: {pr_number}") from e
            if e.response["Error"]["Code"] == 'InvalidTitleException':
                raise ValueError(f"Invalid title for PR number: {pr_number}") from e
            if e.response["Error"]["Code"] == 'InvalidDescriptionException':
                raise ValueError(f"Invalid description for PR number: {pr_number}") from e
            if e.response["Error"]["Code"] == 'PullRequestAlreadyClosedException':
                raise ValueError(f"PR is already closed: PR number: {pr_number}") from e
            raise ValueError(f"Boto3 client error calling publish_description") from e
        except Exception as e:
            raise ValueError(f"Error calling publish_description") from e

    def publish_comment(self, repo_name: str, pr_number: int, destination_commit: str, source_commit: str, comment: str, annotation_file: str = None, annotation_line: int = None):
        """
        Publish a comment to a pull request

        Args:
        - repo_name: name of the repository
        - pr_number: number of the pull request
        - destination_commit: The commit hash you want to merge into (the "before" hash) (usually on the main or master branch)
        - source_commit: The commit hash of the code you are adding (the "after" branch)
        - comment: The comment you want to publish
        - annotation_file: The file you want to annotate (optional)
        - annotation_line: The line number you want to annotate (optional)

        Comment annotations for CodeCommit are different than GitHub.
        CodeCommit only designates the starting line number for the comment.
        It does not support the ending line number to highlight a range of lines.

        Returns:
        - None

        Boto3 Documentation:
        - aws codecommit post_comment_for_pull_request
        - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_pull_request.html
        """
        if self.boto_client is None:
            self._connect_boto_client()

        try:
            # If the comment has code annotations,
            # then set the file path and line number in the location dictionary
            if annotation_file and annotation_line:
                self.boto_client.post_comment_for_pull_request(
                    pullRequestId=str(pr_number),
                    repositoryName=repo_name,
                    beforeCommitId=destination_commit,
                    afterCommitId=source_commit,
                    content=comment,
                    location={
                        "filePath": annotation_file,
                        "filePosition": annotation_line,
                        "relativeFileVersion": "AFTER",
                    },
                )
            else:
                # The comment does not have code annotations
                self.boto_client.post_comment_for_pull_request(
                    pullRequestId=str(pr_number),
                    repositoryName=repo_name,
                    beforeCommitId=destination_commit,
                    afterCommitId=source_commit,
                    content=comment,
                )
        except botocore.exceptions.ClientError as e:
            if e.response["Error"]["Code"] == 'RepositoryDoesNotExistException':
                raise ValueError(f"Repository does not exist: {repo_name}") from e
            if e.response["Error"]["Code"] == 'PullRequestDoesNotExistException':
                raise ValueError(f"PR number does not exist: {pr_number}") from e
            raise ValueError(f"Boto3 client error calling post_comment_for_pull_request") from e
        except Exception as e:
            raise ValueError(f"Error calling post_comment_for_pull_request") from e


================================================
FILE: pr_agent/git_providers/codecommit_provider.py
================================================
import os
import re
from collections import Counter
from typing import List, Optional, Tuple
from urllib.parse import urlparse

from pr_agent.algo.language_handler import is_valid_file
from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.git_providers.codecommit_client import CodeCommitClient

from ..algo.utils import load_large_diff
from ..config_loader import get_settings
from ..log import get_logger
from .git_provider import GitProvider


class PullRequestCCMimic:
    """
    This class mimics the PullRequest class from the PyGithub library for the CodeCommitProvider.
    """

    def __init__(self, title: str, diff_files: List[FilePatchInfo]):
        self.title = title
        self.diff_files = diff_files
        self.description = None
        self.source_commit = None
        self.source_branch = None  # the branch containing your new code changes
        self.destination_commit = None
        self.destination_branch = None  # the branch you are going to merge into


class CodeCommitFile:
    """
    This class represents a file in a pull request in CodeCommit.
    """

    def __init__(
        self,
        a_path: str,
        a_blob_id: str,
        b_path: str,
        b_blob_id: str,
        edit_type: EDIT_TYPE,
    ):
        self.a_path = a_path
        self.a_blob_id = a_blob_id
        self.b_path = b_path
        self.b_blob_id = b_blob_id
        self.edit_type: EDIT_TYPE = edit_type
        self.filename = b_path if b_path else a_path


class CodeCommitProvider(GitProvider):
    """
    This class implements the GitProvider interface for AWS CodeCommit repositories.
    """

    def __init__(self, pr_url: Optional[str] = None, incremental: Optional[bool] = False):
        self.codecommit_client = CodeCommitClient()
        self.aws_client = None
        self.repo_name = None
        self.pr_num = None
        self.pr = None
        self.diff_files = None
        self.git_files = None
        self.pr_url = pr_url
        if pr_url:
            self.set_pr(pr_url)

    def provider_name(self):
        return "CodeCommit"

    def is_supported(self, capability: str) -> bool:
        if capability in [
            "get_issue_comments",
            "create_inline_comment",
            "publish_inline_comments",
            "get_labels",
            "gfm_markdown"
        ]:
            return False
        return True

    def set_pr(self, pr_url: str):
        self.repo_name, self.pr_num = self._parse_pr_url(pr_url)
        self.pr = self._get_pr()

    def get_files(self) -> list[CodeCommitFile]:
        # bring files from CodeCommit only once
        if self.git_files:
            return self.git_files

        self.git_files = []
        differences = self.codecommit_client.get_differences(self.repo_name, self.pr.destination_commit, self.pr.source_commit)
        for item in differences:
            self.git_files.append(CodeCommitFile(item.before_blob_path,
                                                 item.before_blob_id,
                                                 item.after_blob_path,
                                                 item.after_blob_id,
                                                 CodeCommitProvider._get_edit_type(item.change_type)))
        return self.git_files

    def get_diff_files(self) -> list[FilePatchInfo]:
        """
        Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in CodeCommit,
        along with their content and patch information.

        Returns:
            diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted,
            or renamed files in the merge request.
        """
        # bring files from CodeCommit only once
        if self.diff_files:
            return self.diff_files

        self.diff_files = []

        files = self.get_files()
        for diff_item in files:
            patch_filename = ""
            if diff_item.a_blob_id is not None:
                patch_filename = diff_item.a_path
                original_file_content_str = self.codecommit_client.get_file(
                    self.repo_name, diff_item.a_path, self.pr.destination_commit)
                if isinstance(original_file_content_str, (bytes, bytearray)):
                    original_file_content_str = original_file_content_str.decode("utf-8")
            else:
                original_file_content_str = ""

            if diff_item.b_blob_id is not None:
                patch_filename = diff_item.b_path
                new_file_content_str = self.codecommit_client.get_file(self.repo_name, diff_item.b_path, self.pr.source_commit)
                if isinstance(new_file_content_str, (bytes, bytearray)):
                    new_file_content_str = new_file_content_str.decode("utf-8")
            else:
                new_file_content_str = ""

            patch = load_large_diff(patch_filename, new_file_content_str, original_file_content_str)

            # Store the diffs as a list of FilePatchInfo objects
            info = FilePatchInfo(
                original_file_content_str,
                new_file_content_str,
                patch,
                diff_item.b_path,
                edit_type=diff_item.edit_type,
                old_filename=None
                if diff_item.a_path == diff_item.b_path
                else diff_item.a_path,
            )
            # Only add valid files to the diff list
            # "bad extensions" are set in the language_extensions.toml file
            # a "valid file" is one that is not in the "bad extensions" list
            if is_valid_file(info.filename):
                self.diff_files.append(info)

        return self.diff_files

    def publish_description(self, pr_title: str, pr_body: str):
        try:
            self.codecommit_client.publish_description(
                pr_number=self.pr_num,
                pr_title=pr_title,
                pr_body=CodeCommitProvider._add_additional_newlines(pr_body),
            )
        except Exception as e:
            raise ValueError(f"CodeCommit Cannot publish description for PR: {self.pr_num}") from e

    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        if is_temporary:
            get_logger().info(pr_comment)
            return

        pr_comment = CodeCommitProvider._remove_markdown_html(pr_comment)
        pr_comment = CodeCommitProvider._add_additional_newlines(pr_comment)

        try:
            self.codecommit_client.publish_comment(
                repo_name=self.repo_name,
                pr_number=self.pr_num,
                destination_commit=self.pr.destination_commit,
                source_commit=self.pr.source_commit,
                comment=pr_comment,
            )
        except Exception as e:
            raise ValueError(f"CodeCommit Cannot publish comment for PR: {self.pr_num}") from e

    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        counter = 1
        for suggestion in code_suggestions:
            # Verify that each suggestion has the required keys
            if not all(key in suggestion for key in ["body", "relevant_file", "relevant_lines_start"]):
                get_logger().warning(f"Skipping code suggestion #{counter}: Each suggestion must have 'body', 'relevant_file', 'relevant_lines_start' keys")
                continue

            # Publish the code suggestion to CodeCommit
            try:
                get_logger().debug(f"Code Suggestion #{counter} in file: {suggestion['relevant_file']}: {suggestion['relevant_lines_start']}")
                self.codecommit_client.publish_comment(
                    repo_name=self.repo_name,
                    pr_number=self.pr_num,
                    destination_commit=self.pr.destination_commit,
                    source_commit=self.pr.source_commit,
                    comment=suggestion["body"],
                    annotation_file=suggestion["relevant_file"],
                    annotation_line=suggestion["relevant_lines_start"],
                )
            except Exception as e:
                raise ValueError(f"CodeCommit Cannot publish code suggestions for PR: {self.pr_num}") from e

            counter += 1

        # The calling function passes in a list of code suggestions, and this function publishes each suggestion one at a time.
        # If we were to return False here, the calling function will attempt to publish the same list of code suggestions again, one at a time.
        # Since this function publishes the suggestions one at a time anyway, we always return True here to avoid the retry.
        return True

    def publish_labels(self, labels):
        return [""]  # not implemented yet

    def get_pr_labels(self, update=False):
        return [""]  # not implemented yet

    def remove_initial_comment(self):
        return ""  # not implemented yet

    def remove_comment(self, comment):
        return ""  # not implemented yet

    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html
        raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

    def publish_inline_comments(self, comments: list[dict]):
        raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet")

    def get_title(self):
        return self.pr.title

    def get_pr_id(self):
        """
        Returns the PR ID in the format: "repo_name/pr_number".
        Note: This is an internal identifier for PR-Agent,
        and is not the same as the CodeCommit PR identifier.
        """
        try:
            pr_id = f"{self.repo_name}/{self.pr_num}"
            return pr_id
        except:
            return ""

    def get_languages(self):
        """
        Returns a dictionary of languages, containing the percentage of each language used in the PR.

        Returns:
        - dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR.
        """
        commit_files = self.get_files()
        filenames = [ item.filename for item in commit_files ]
        extensions = CodeCommitProvider._get_file_extensions(filenames)

        # Calculate the percentage of each file extension in the PR
        percentages = CodeCommitProvider._get_language_percentages(extensions)

        # The global language_extension_map is a dictionary of languages,
        # where each dictionary item is a BoxList of extensions.
        # We want a dictionary of extensions,
        # where each dictionary item is a language name.
        # We build that language->extension dictionary here in main_extensions_flat.
        main_extensions_flat = {}
        language_extension_map_org = get_settings().language_extension_map_org
        language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}
        for language, extensions in language_extension_map.items():
            for ext in extensions:
                main_extensions_flat[ext] = language

        # Map the file extension/languages to percentages
        languages = {}
        for ext, pct in percentages.items():
            languages[main_extensions_flat.get(ext, "")] = pct

        return languages

    def get_pr_branch(self):
        return self.pr.source_branch

    def get_pr_description_full(self) -> str:
        return self.pr.description

    def get_user_id(self):
        return -1  # not implemented yet

    def get_issue_comments(self):
        raise NotImplementedError("CodeCommit provider does not support issue comments yet")

    def get_repo_settings(self):
        # a local ".pr_agent.toml" settings file is optional
        settings_filename = ".pr_agent.toml"
        return self.codecommit_client.get_file(self.repo_name, settings_filename, self.pr.source_commit, optional=True)

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        get_logger().info("CodeCommit provider does not support eyes reaction yet")
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
        get_logger().info("CodeCommit provider does not support removing reactions yet")
        return True

    @staticmethod
    def _parse_pr_url(pr_url: str) -> Tuple[str, int]:
        """
        Parse the CodeCommit PR URL and return the repository name and PR number.

        Args:
        - pr_url: the full AWS CodeCommit pull request URL

        Returns:
        - Tuple[str, int]: A tuple containing the repository name and PR number.
        """
        # Example PR URL:
        # https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/__MY_REPO__/pull-requests/123456"
        parsed_url = urlparse(pr_url)

        if not CodeCommitProvider._is_valid_codecommit_hostname(parsed_url.netloc):
            raise ValueError(f"The provided URL is not a valid CodeCommit URL: {pr_url}")

        path_parts = parsed_url.path.strip("/").split("/")

        if (
            len(path_parts) < 6
            or path_parts[0] != "codesuite"
            or path_parts[1] != "codecommit"
            or path_parts[2] != "repositories"
            or path_parts[4] != "pull-requests"
        ):
            raise ValueError(f"The provided URL does not appear to be a CodeCommit PR URL: {pr_url}")

        repo_name = path_parts[3]

        try:
            pr_number = int(path_parts[5])
        except ValueError as e:
            raise ValueError(f"Unable to convert PR number to integer: '{path_parts[5]}'") from e

        return repo_name, pr_number

    @staticmethod
    def _is_valid_codecommit_hostname(hostname: str) -> bool:
        """
        Check if the provided hostname is a valid AWS CodeCommit hostname.

        This is not an exhaustive check of AWS region names,
        but instead uses a regex to check for matching AWS region patterns.

        Args:
        - hostname: the hostname to check

        Returns:
        - bool: True if the hostname is valid, False otherwise.
        """
        return re.match(r"^[a-z]{2}-(gov-)?[a-z]+-\d\.console\.aws\.amazon\.com$", hostname) is not None

    def _get_pr(self):
        response = self.codecommit_client.get_pr(self.repo_name, self.pr_num)

        if len(response.targets) == 0:
            raise ValueError(f"No files found in CodeCommit PR: {self.pr_num}")

        # TODO: implement support for multiple targets in one CodeCommit PR
        #       for now, we are only using the first target in the PR
        if len(response.targets) > 1:
            get_logger().warning(
                "Multiple targets in one PR is not supported for CodeCommit yet. Continuing, using the first target only..."
            )

        # Return our object that mimics PullRequest class from the PyGithub library
        # (This strategy was copied from the LocalGitProvider)
        mimic = PullRequestCCMimic(response.title, self.diff_files)
        mimic.description = response.description
        mimic.source_commit = response.targets[0].source_commit
        mimic.source_branch = response.targets[0].source_branch
        mimic.destination_commit = response.targets[0].destination_commit
        mimic.destination_branch = response.targets[0].destination_branch

        return mimic

    def get_commit_messages(self):
        return ""  # not implemented yet

    @staticmethod
    def _add_additional_newlines(body: str) -> str:
        """
        Replace single newlines in a PR body with double newlines.

        CodeCommit Markdown does not seem to render as well as GitHub Markdown,
        so we add additional newlines to the PR body to make it more readable in CodeCommit.

        Args:
        - body: the PR body

        Returns:
        - str: the PR body with the double newlines added
        """
        return re.sub(r'(?<!\n)\n(?!\n)', '\n\n', body)

    @staticmethod
    def _remove_markdown_html(comment: str) -> str:
        """
        Remove the HTML tags from a PR comment.

        CodeCommit Markdown does not seem to render as well as GitHub Markdown,
        so we remove the HTML tags from the PR comment to make it more readable in CodeCommit.

        Args:
        - comment: the PR comment

        Returns:
        - str: the PR comment with the HTML tags removed
        """
        comment = comment.replace("<details>", "")
        comment = comment.replace("</details>", "")
        comment = comment.replace("<summary>", "")
        comment = comment.replace("</summary>", "")
        return comment

    @staticmethod
    def _get_edit_type(codecommit_change_type: str):
        """
        Convert the CodeCommit change type string to the EDIT_TYPE enum.
        The CodeCommit change type string is returned from the get_differences SDK method.

        Args:
        - codecommit_change_type: the CodeCommit change type string

        Returns:
        - An EDIT_TYPE enum representing the modified, added, deleted, or renamed file in the PR diff.
        """
        t = codecommit_change_type.upper()
        edit_type = None
        if t == "A":
            edit_type = EDIT_TYPE.ADDED
        elif t == "D":
            edit_type = EDIT_TYPE.DELETED
        elif t == "M":
            edit_type = EDIT_TYPE.MODIFIED
        elif t == "R":
            edit_type = EDIT_TYPE.RENAMED
        return edit_type

    @staticmethod
    def _get_file_extensions(filenames):
        """
        Return a list of file extensions from a list of filenames.
        The returned extensions will include the dot "." prefix,
        to accommodate for the dots in the existing language_extension_map settings.
        Filenames with no extension will return an empty string for the extension.

        Args:
        - filenames: a list of filenames

        Returns:
        - list: A list of file extensions, including the dot "." prefix.
        """
        extensions = []
        for filename in filenames:
            filename, ext = os.path.splitext(filename)
            if ext:
                extensions.append(ext.lower())
            else:
                extensions.append("")
        return extensions

    @staticmethod
    def _get_language_percentages(extensions):
        """
        Return a dictionary containing the programming language name (as the key),
        and the percentage that language is used (as the value),
        given a list of file extensions.

        Args:
        - extensions: a list of file extensions

        Returns:
        - dict: A dictionary where each key is a language name and the corresponding value is the percentage of that language in the PR.
        """
        total_files = len(extensions)
        if total_files == 0:
            return {}

        # Identify language by file extension and count
        lang_count = Counter(extensions)
        # Convert counts to percentages
        lang_percentage = {
            lang: round(count / total_files * 100) for lang, count in lang_count.items()
        }
        return lang_percentage


================================================
FILE: pr_agent/git_providers/gerrit_provider.py
================================================
import json
import os
import pathlib
import shutil
import subprocess
import uuid
from collections import Counter, namedtuple
from pathlib import Path
from tempfile import NamedTemporaryFile, mkdtemp

import requests
import urllib3.util
from git import Repo

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import GitProvider
from pr_agent.git_providers.local_git_provider import PullRequestMimic
from pr_agent.log import get_logger


def _call(*command, **kwargs) -> (int, str, str):
    res = subprocess.run(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        check=True,
        **kwargs,
    )
    return res.stdout.decode()


def clone(url, directory):
    get_logger().info("Cloning %s to %s", url, directory)
    stdout = _call('git', 'clone', "--depth", "1", url, directory)
    get_logger().info(stdout)


def fetch(url, refspec, cwd):
    get_logger().info("Fetching %s %s", url, refspec)
    stdout = _call(
        'git', 'fetch', '--depth', '2', url, refspec,
        cwd=cwd
    )
    get_logger().info(stdout)


def checkout(cwd):
    get_logger().info("Checking out")
    stdout = _call('git', 'checkout', "FETCH_HEAD", cwd=cwd)
    get_logger().info(stdout)


def show(*args, cwd=None):
    get_logger().info("Show")
    return _call('git', 'show', *args, cwd=cwd)


def diff(*args, cwd=None):
    get_logger().info("Diff")
    patch = _call('git', 'diff', *args, cwd=cwd)
    if not patch:
        get_logger().warning("No changes found")
        return
    return patch


def reset_local_changes(cwd):
    get_logger().info("Reset local changes")
    _call('git', 'checkout', "--force", cwd=cwd)


def add_comment(url: urllib3.util.Url, refspec, message):
    *_, patchset, changenum = refspec.rsplit("/")
    message = "'" + message.replace("'", "'\"'\"'") + "'"
    return _call(
        "ssh",
        "-p", str(url.port),
        f"{url.auth}@{url.host}",
        "gerrit", "review",
        "--message", message,
        # "--code-review", score,
        f"{patchset},{changenum}",
    )


def list_comments(url: urllib3.util.Url, refspec):
    *_, patchset, _ = refspec.rsplit("/")
    stdout = _call(
        "ssh",
        "-p", str(url.port),
        f"{url.auth}@{url.host}",
        "gerrit", "query",
        "--comments",
        "--current-patch-set", patchset,
        "--format", "JSON",
    )
    change_set, *_ = stdout.splitlines()
    return json.loads(change_set)["currentPatchSet"]["comments"]


def prepare_repo(url: urllib3.util.Url, project, refspec):
    repo_url = (f"{url.scheme}://{url.auth}@{url.host}:{url.port}/{project}")

    directory = pathlib.Path(mkdtemp())
    clone(repo_url, directory)
    fetch(repo_url, refspec, cwd=directory)
    checkout(cwd=directory)
    return directory


def adopt_to_gerrit_message(message):
    lines = message.splitlines()
    buf = []
    for line in lines:
        # remove markdown formatting
        line = (line.replace("*", "")
                .replace("``", "`")
                .replace("<details>", "")
                .replace("</details>", "")
                .replace("<summary>", "")
                .replace("</summary>", ""))

        line = line.strip()
        if line.startswith('#'):
            buf.append("\n" +
                       line.replace('#', '').removesuffix(":").strip() +
                       ":")
            continue
        elif line.startswith('-'):
            buf.append(line.removeprefix('-').strip())
            continue
        else:
            buf.append(line)
    return "\n".join(buf).strip()


def add_suggestion(src_filename, context: str, start, end: int):
    with (
        NamedTemporaryFile("w", delete=False) as tmp,
        open(src_filename, "r") as src
    ):
        lines = src.readlines()
        tmp.writelines(lines[:start - 1])
        if context:
            tmp.write(context)
        tmp.writelines(lines[end:])

    shutil.copy(tmp.name, src_filename)
    os.remove(tmp.name)


def upload_patch(patch, path):
    patch_server_endpoint = get_settings().get(
        'gerrit.patch_server_endpoint')
    patch_server_token = get_settings().get(
        'gerrit.patch_server_token')

    response = requests.post(
        patch_server_endpoint,
        json={
            "content": patch,
            "path": path,
        },
        headers={
            "Content-Type": "application/json",
            "Authorization": f"Bearer {patch_server_token}",
        }
    )
    response.raise_for_status()
    patch_server_endpoint = patch_server_endpoint.rstrip("/")
    return patch_server_endpoint + "/" + path


class GerritProvider(GitProvider):

    def __init__(self, key: str, incremental=False):
        self.project, self.refspec = key.split(':')
        assert self.project, "Project name is required"
        assert self.refspec, "Refspec is required"
        base_url = get_settings().get('gerrit.url')
        assert base_url, "Gerrit URL is required"
        user = get_settings().get('gerrit.user')
        assert user, "Gerrit user is required"

        parsed = urllib3.util.parse_url(base_url)
        self.parsed_url = urllib3.util.parse_url(
            f"{parsed.scheme}://{user}@{parsed.host}:{parsed.port}"
        )

        self.repo_path = prepare_repo(
            self.parsed_url, self.project, self.refspec
        )
        self.repo = Repo(self.repo_path)
        assert self.repo
        self.pr_url = base_url
        self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files())

    def get_pr_title(self):
        """
        Substitutes the branch-name as the PR-mimic title.
        """
        return self.repo.branches[0].name

    def get_issue_comments(self):
        comments = list_comments(self.parsed_url, self.refspec)
        Comments = namedtuple('Comments', ['reversed'])
        Comment = namedtuple('Comment', ['body'])
        return Comments([Comment(c['message']) for c in reversed(comments)])

    def get_pr_labels(self, update=False):
        raise NotImplementedError(
            'Getting labels is not implemented for the gerrit provider')

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False):
        raise NotImplementedError(
            'Adding reactions is not implemented for the gerrit provider')

    def remove_reaction(self, issue_comment_id: int, reaction_id: int):
        raise NotImplementedError(
            'Removing reactions is not implemented for the gerrit provider')

    def get_commit_messages(self):
        return [self.repo.head.commit.message]

    def get_repo_settings(self):
        try:
            with open(self.repo_path / ".pr_agent.toml", 'rb') as f:
                contents = f.read()
            return contents
        except OSError:
            return b""

    def get_diff_files(self) -> list[FilePatchInfo]:
        diffs = self.repo.head.commit.diff(
            self.repo.head.commit.parents[0],  # previous commit
            create_patch=True,
            R=True
        )

        diff_files = []
        for diff_item in diffs:
            if diff_item.a_blob is not None:
                original_file_content_str = (
                    diff_item.a_blob.data_stream.read().decode('utf-8')
                )
            else:
                original_file_content_str = ""  # empty file
            if diff_item.b_blob is not None:
                new_file_content_str = diff_item.b_blob.data_stream.read(). \
                    decode('utf-8')
            else:
                new_file_content_str = ""  # empty file
            edit_type = EDIT_TYPE.MODIFIED
            if diff_item.new_file:
                edit_type = EDIT_TYPE.ADDED
            elif diff_item.deleted_file:
                edit_type = EDIT_TYPE.DELETED
            elif diff_item.renamed_file:
                edit_type = EDIT_TYPE.RENAMED
            diff_files.append(
                FilePatchInfo(
                    original_file_content_str,
                    new_file_content_str,
                    diff_item.diff.decode('utf-8'),
                    diff_item.b_path,
                    edit_type=edit_type,
                    old_filename=None
                    if diff_item.a_path == diff_item.b_path
                    else diff_item.a_path
                )
            )
        self.diff_files = diff_files
        return diff_files

    def get_files(self):
        diff_index = self.repo.head.commit.diff(
            self.repo.head.commit.parents[0],  # previous commit
            R=True
        )
        # Get the list of changed files
        diff_files = [item.a_path for item in diff_index]
        return diff_files

    def get_languages(self):
        """
        Calculate percentage of languages in repository. Used for hunk
        prioritisation.
        """
        # Get all files in repository
        filepaths = [Path(item.path) for item in
                     self.repo.tree().traverse() if item.type == 'blob']
        # Identify language by file extension and count
        lang_count = Counter(
            ext.lstrip('.') for filepath in filepaths for ext in
            [filepath.suffix.lower()])
        # Convert counts to percentages
        total_files = len(filepaths)
        lang_percentage = {lang: count / total_files * 100 for lang, count
                           in lang_count.items()}
        return lang_percentage

    def get_pr_description_full(self):
        return self.repo.head.commit.message

    def get_user_id(self):
        return self.repo.head.commit.author.email

    def is_supported(self, capability: str) -> bool:
        if capability in [
            # 'get_issue_comments',
            'create_inline_comment',
            'publish_inline_comments',
            'get_labels',
            'gfm_markdown'
        ]:
            return False
        return True

    def split_suggestion(self, msg) -> tuple[str, str]:
        is_code_context = False
        description = []
        context = []
        for line in msg.splitlines():
            if line.startswith('```suggestion'):
                is_code_context = True
                continue
            if line.startswith('```'):
                is_code_context = False
                continue
            if is_code_context:
                context.append(line)
            else:
                description.append(
                    line.replace('*', '')
                )

        return (
            '\n'.join(description),
            '\n'.join(context) + '\n' if context else ''
        )

    def publish_code_suggestions(self, code_suggestions: list):
        msg = []
        for suggestion in code_suggestions:
            description, code = self.split_suggestion(suggestion['body'])
            add_suggestion(
                pathlib.Path(self.repo_path) / suggestion["relevant_file"],
                code,
                suggestion["relevant_lines_start"],
                suggestion["relevant_lines_end"],
            )
            patch = diff(cwd=self.repo_path)
            patch_id = uuid.uuid4().hex[0:4]
            path = "/".join(["codium-ai", self.refspec, patch_id])
            full_path = upload_patch(patch, path)
            reset_local_changes(self.repo_path)
            msg.append(f'* {description}\n{full_path}')

        if msg:
            add_comment(self.parsed_url, self.refspec, "\n".join(msg))
            return True

    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        if not is_temporary:
            msg = adopt_to_gerrit_message(pr_comment)
            add_comment(self.parsed_url, self.refspec, msg)

    def publish_description(self, pr_title: str, pr_body: str):
        msg = adopt_to_gerrit_message(pr_body)
        add_comment(self.parsed_url, self.refspec, pr_title + '\n' + msg)

    def publish_inline_comments(self, comments: list[dict]):
        raise NotImplementedError(
            'Publishing inline comments is not implemented for the gerrit '
            'provider')

    def publish_inline_comment(self, body: str, relevant_file: str,
                               relevant_line_in_file: str, original_suggestion=None):
        raise NotImplementedError(
            'Publishing inline comments is not implemented for the gerrit '
            'provider')


    def publish_labels(self, labels):
        # Not applicable to the local git provider,
        # but required by the interface
        pass

    def remove_initial_comment(self):
        # remove repo, cloned in previous steps
        # shutil.rmtree(self.repo_path)
        pass

    def remove_comment(self, comment):
        pass

    def get_pr_branch(self):
        return self.repo.head


================================================
FILE: pr_agent/git_providers/git_provider.py
================================================
from abc import ABC, abstractmethod
# enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED)
import os
import shutil
import subprocess
from typing import Optional, Tuple

from pr_agent.algo.types import FilePatchInfo
from pr_agent.algo.utils import Range, process_description
from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger

MAX_FILES_ALLOWED_FULL = 50

def get_git_ssl_env() -> dict[str, str]:
    """
    Get git SSL configuration arguments for per-command use.
    This fixes SSL certificate issues when cloning repos with self-signed certificates.
    Returns the current environment with the addition of SSL config changes if any such SSL certificates exist.
    """
    ssl_cert_file = os.environ.get('SSL_CERT_FILE')
    requests_ca_bundle = os.environ.get('REQUESTS_CA_BUNDLE')
    git_ssl_ca_info = os.environ.get('GIT_SSL_CAINFO')

    chosen_cert_file = ""

    # Try SSL_CERT_FILE first
    if ssl_cert_file:
        if os.path.exists(ssl_cert_file):
            if ((requests_ca_bundle and requests_ca_bundle != ssl_cert_file)
                    or (git_ssl_ca_info and git_ssl_ca_info != ssl_cert_file)):
                get_logger().warning(f"Found mismatch among: SSL_CERT_FILE, REQUESTS_CA_BUNDLE, GIT_SSL_CAINFO. "
                                     f"Using the SSL_CERT_FILE to resolve ambiguity.",
                                  artifact={"ssl_cert_file": ssl_cert_file, "requests_ca_bundle": requests_ca_bundle,
                                            'git_ssl_ca_info': git_ssl_ca_info})
            else:
                get_logger().info(f"Using SSL certificate bundle for git operations", artifact={"ssl_cert_file": ssl_cert_file})
            chosen_cert_file = ssl_cert_file
        else:
            get_logger().warning("SSL certificate bundle not found for git operations", artifact={"ssl_cert_file": ssl_cert_file})

    # Fallback to REQUESTS_CA_BUNDLE
    elif requests_ca_bundle:
        if os.path.exists(requests_ca_bundle):
            if (git_ssl_ca_info and git_ssl_ca_info != requests_ca_bundle):
                get_logger().warning(f"Found mismatch between: REQUESTS_CA_BUNDLE, GIT_SSL_CAINFO. "
                                     f"Using the REQUESTS_CA_BUNDLE to resolve ambiguity.",
                artifact = {"requests_ca_bundle": requests_ca_bundle, 'git_ssl_ca_info': git_ssl_ca_info})
            else:
                get_logger().info("Using SSL certificate bundle from REQUESTS_CA_BUNDLE for git operations",
                                  artifact={"requests_ca_bundle": requests_ca_bundle})
            chosen_cert_file = requests_ca_bundle
        else:
            get_logger().warning("requests CA bundle not found for git operations", artifact={"requests_ca_bundle": requests_ca_bundle})

    #Fallback to GIT CA:
    elif git_ssl_ca_info:
        if os.path.exists(git_ssl_ca_info):
            get_logger().info("Using git SSL CA info from GIT_SSL_CAINFO for git operations",
                              artifact={"git_ssl_ca_info": git_ssl_ca_info})
            chosen_cert_file = git_ssl_ca_info
        else:
            get_logger().warning("git SSL CA info not found for git operations", artifact={"git_ssl_ca_info": git_ssl_ca_info})

    else:
        get_logger().warning("Neither SSL_CERT_FILE nor REQUESTS_CA_BUNDLE nor GIT_SSL_CAINFO are defined, or they are defined but not found. Returning environment without SSL configuration")

    returned_env = os.environ.copy()
    if chosen_cert_file:
        returned_env.update({"GIT_SSL_CAINFO": chosen_cert_file, "REQUESTS_CA_BUNDLE": chosen_cert_file})
    return returned_env


class GitProvider(ABC):
    @abstractmethod
    def is_supported(self, capability: str) -> bool:
        pass

    #Given a url (issues or PR/MR) - get the .git repo url to which they belong. Needs to be implemented by the provider.
    def get_git_repo_url(self, issues_or_pr_url: str) -> str:
        get_logger().warning("Not implemented! Returning empty url")
        return ""

    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo. Needs to be implemented by the provider.
    # For example: For a git: https://git_provider.com/MY_PROJECT/MY_REPO.git and desired branch: <MY_BRANCH> then it should return ('https://git_provider.com/projects/MY_PROJECT/repos/MY_REPO/.../<MY_BRANCH>', '?=<SOME HEADER>')
    # so that to properly view the file: docs/readme.md -> <PREFIX>/docs/readme.md<SUFFIX> -> https://git_provider.com/projects/MY_PROJECT/repos/MY_REPO/<MY_BRANCH>/docs/readme.md?=<SOME HEADER>)
    def get_canonical_url_parts(self, repo_git_url:str, desired_branch:str) -> Tuple[str, str]:
        get_logger().warning("Not implemented! Returning empty prefix and suffix")
        return ("", "")


    #Clone related API
    #An object which ensures deletion of a cloned repo, once it becomes out of scope.
    # Example usage:
    #    with TemporaryDirectory() as tmp_dir:
    #            returned_obj: GitProvider.ScopedClonedRepo = self.git_provider.clone(self.repo_url, tmp_dir, remove_dest_folder=False)
    #            print(returned_obj.path) #Use returned_obj.path.
    #    #From this point, returned_obj.path may be deleted at any point and therefore must not be used.
    class ScopedClonedRepo(object):
        def __init__(self, dest_folder):
            self.path = dest_folder

        def __del__(self):
            if self.path and os.path.exists(self.path):
                shutil.rmtree(self.path, ignore_errors=True)

    #Method to allow implementors to manipulate the repo url to clone (such as embedding tokens in the url string). Needs to be implemented by the provider.
    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
        get_logger().warning("Not implemented! Returning None")
        return None

    # Does a shallow clone, using a forked process to support a timeout guard.
    # In case operation has failed, it is expected to throw an exception as this method does not return a value.
    def _clone_inner(self, repo_url: str, dest_folder: str, operation_timeout_in_seconds: int=None) -> None:
        #The following ought to be equivalent to:
        # #Repo.clone_from(repo_url, dest_folder)
        # , but with throwing an exception upon timeout.
        # Note: This can only be used in context that supports using pipes.
        try:
            ssl_env = get_git_ssl_env()
        except Exception as e:
            get_logger().exception(
                "Failed to prepare SSL environment for git operations, falling back to default env",
                artifact={"error": e}
            )
            ssl_env = os.environ.copy()

        subprocess.run([
            "git", "clone",
            "--filter=blob:none",
            "--depth", "1",
            repo_url, dest_folder
        ], env=ssl_env, check=True,  # check=True will raise an exception if the command fails
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=operation_timeout_in_seconds)

    CLONE_TIMEOUT_SEC = 20
    # Clone a given url to a destination folder. If successful, returns an object that wraps the destination folder,
    # deleting it once it is garbage collected. See: GitProvider.ScopedClonedRepo for more details.
    def clone(self, repo_url_to_clone: str, dest_folder: str, remove_dest_folder: bool = True,
              operation_timeout_in_seconds: int=CLONE_TIMEOUT_SEC) -> ScopedClonedRepo|None:
        returned_obj = None
        clone_url = self._prepare_clone_url_with_token(repo_url_to_clone)
        if not clone_url:
            get_logger().error("Clone failed: Unable to obtain url to clone.")
            return returned_obj
        try:
            if remove_dest_folder and os.path.exists(dest_folder) and os.path.isdir(dest_folder):
                shutil.rmtree(dest_folder)
            self._clone_inner(clone_url, dest_folder, operation_timeout_in_seconds)
            returned_obj = GitProvider.ScopedClonedRepo(dest_folder)
        except Exception as e:
            get_logger().exception(f"Clone failed: Could not clone url.",
                artifact={"error": str(e), "url": clone_url, "dest_folder": dest_folder})
        finally:
            return returned_obj

    @abstractmethod
    def get_files(self) -> list:
        pass

    @abstractmethod
    def get_diff_files(self) -> list[FilePatchInfo]:
        pass

    def get_incremental_commits(self, is_incremental):
        pass

    @abstractmethod
    def publish_description(self, pr_title: str, pr_body: str):
        pass

    @abstractmethod
    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        pass

    @abstractmethod
    def get_languages(self):
        pass

    @abstractmethod
    def get_pr_branch(self):
        pass

    @abstractmethod
    def get_user_id(self):
        pass

    @abstractmethod
    def get_pr_description_full(self) -> str:
        pass

    def edit_comment(self, comment, body: str):
        pass

    def edit_comment_from_comment_id(self, comment_id: int, body: str):
        pass

    def get_comment_body_from_comment_id(self, comment_id: int) -> str:
        pass

    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
        pass

    def get_pr_description(self, full: bool = True, split_changes_walkthrough=False) -> str | tuple:
        from pr_agent.algo.utils import clip_tokens
        from pr_agent.config_loader import get_settings
        max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None)
        description = self.get_pr_description_full() if full else self.get_user_description()
        if split_changes_walkthrough:
            description, files = process_description(description)
            if max_tokens_description:
                description = clip_tokens(description, max_tokens_description)
            return description, files
        else:
            if max_tokens_description:
                description = clip_tokens(description, max_tokens_description)
            return description

    def get_user_description(self) -> str:
        if hasattr(self, 'user_description') and not (self.user_description is None):
            return self.user_description

        description = (self.get_pr_description_full() or "").strip()
        description_lowercase = description.lower()
        get_logger().debug(f"Existing description", description=description_lowercase)

        # if the existing description wasn't generated by the pr-agent, just return it as-is
        if not self._is_generated_by_pr_agent(description_lowercase):
            get_logger().info(f"Existing description was not generated by the pr-agent")
            self.user_description = description
            return description

        # if the existing description was generated by the pr-agent, but it doesn't contain a user description,
        # return nothing (empty string) because it means there is no user description
        user_description_header = "### **user description**"
        if user_description_header not in description_lowercase:
            get_logger().info(f"Existing description was generated by the pr-agent, but it doesn't contain a user description")
            return ""

        # otherwise, extract the original user description from the existing pr-agent description and return it
        # user_description_start_position = description_lowercase.find(user_description_header) + len(user_description_header)
        # return description[user_description_start_position:].split("\n", 1)[-1].strip()

        # the 'user description' is in the beginning. extract and return it
        possible_headers = self._possible_headers()
        start_position = description_lowercase.find(user_description_header) + len(user_description_header)
        end_position = len(description)
        for header in possible_headers: # try to clip at the next header
            if header != user_description_header and header in description_lowercase:
                end_position = min(end_position, description_lowercase.find(header))
        if end_position != len(description) and end_position > start_position:
            original_user_description = description[start_position:end_position].strip()
            if original_user_description.endswith("___"):
                original_user_description = original_user_description[:-3].strip()
        else:
            original_user_description = description.split("___")[0].strip()
            if original_user_description.lower().startswith(user_description_header):
                original_user_description = original_user_description[len(user_description_header):].strip()

        get_logger().info(f"Extracted user description from existing description",
                          description=original_user_description)
        self.user_description = original_user_description
        return original_user_description

    def _possible_headers(self):
        return ("### **user description**", "### **pr type**", "### **pr description**", "### **pr labels**", "### **type**", "### **description**",
                "### **labels**", "### 🤖 generated by pr agent")

    def _is_generated_by_pr_agent(self, description_lowercase: str) -> bool:
        possible_headers = self._possible_headers()
        return any(description_lowercase.startswith(header) for header in possible_headers)

    @abstractmethod
    def get_repo_settings(self):
        pass

    def get_workspace_name(self):
        return ""

    def get_pr_id(self):
        return ""

    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
        return ""

    def get_lines_link_original_file(self, filepath:str, component_range: Range) -> str:
        return ""

    #### comments operations ####
    @abstractmethod
    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        pass

    def publish_persistent_comment(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        return self.publish_comment(pr_comment)

    def publish_persistent_comment_full(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        try:
            prev_comments = list(self.get_issue_comments())
            for comment in prev_comments:
                if comment.body.startswith(initial_header):
                    latest_commit_url = self.get_latest_commit_url()
                    comment_url = self.get_comment_url(comment)
                    if update_header:
                        updated_header = f"{initial_header}\n\n#### ({name.capitalize()} updated until commit {latest_commit_url})\n"
                        pr_comment_updated = pr_comment.replace(initial_header, updated_header)
                    else:
                        pr_comment_updated = pr_comment
                    get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
                    # response = self.mr.notes.update(comment.id, {'body': pr_comment_updated})
                    self.edit_comment(comment, pr_comment_updated)
                    if final_update_message:
                        return self.publish_comment(
                            f"**[Persistent {name}]({comment_url})** updated to latest commit {latest_commit_url}")
                    return comment
        except Exception as e:
            get_logger().exception(f"Failed to update persistent review, error: {e}")
            pass
        return self.publish_comment(pr_comment)

    @abstractmethod
    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        pass

    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
                              absolute_position: int = None):
        raise NotImplementedError("This git provider does not support creating inline comments yet")

    @abstractmethod
    def publish_inline_comments(self, comments: list[dict]):
        pass

    @abstractmethod
    def remove_initial_comment(self):
        pass

    @abstractmethod
    def remove_comment(self, comment):
        pass

    @abstractmethod
    def get_issue_comments(self):
        pass

    def get_comment_url(self, comment) -> str:
        return ""

    def get_review_thread_comments(self, comment_id: int) -> list[dict]:
        pass

    #### labels operations ####
    @abstractmethod
    def publish_labels(self, labels):
        pass

    @abstractmethod
    def get_pr_labels(self, update=False):
        pass

    def get_repo_labels(self):
        pass

    @abstractmethod
    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        pass

    @abstractmethod
    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
        pass

    #### commits operations ####
    @abstractmethod
    def get_commit_messages(self):
        pass

    def get_pr_url(self) -> str:
        if hasattr(self, 'pr_url'):
            return self.pr_url
        return ""

    def get_latest_commit_url(self) -> str:
        return ""

    def auto_approve(self) -> bool:
        return False

    def calc_pr_statistics(self, pull_request_data: dict):
        return {}

    def get_num_of_files(self):
        try:
            return len(self.get_diff_files())
        except Exception as e:
            return -1

    def limit_output_characters(self, output: str, max_chars: int):
        return output[:max_chars] + '...' if len(output) > max_chars else output


def get_main_pr_language(languages, files) -> str:
    """
    Get the main language of the commit. Return an empty string if cannot determine.
    """
    main_language_str = ""
    if not languages:
        get_logger().info("No languages detected")
        return main_language_str
    if not files:
        get_logger().info("No files in diff")
        return main_language_str

    try:
        top_language = max(languages, key=languages.get).lower()

        # validate that the specific commit uses the main language
        extension_list = []
        for file in files:
            if not file:
                continue
            if isinstance(file, str):
                file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file)
            extension_list.append(file.filename.rsplit('.')[-1])

        # get the most common extension
        most_common_extension = '.' + max(set(extension_list), key=extension_list.count)
        try:
            language_extension_map_org = get_settings().language_extension_map_org
            language_extension_map = {k.lower(): v for k, v in language_extension_map_org.items()}

            if top_language in language_extension_map and most_common_extension in language_extension_map[top_language]:
                main_language_str = top_language
            else:
                for language, extensions in language_extension_map.items():
                    if most_common_extension in extensions:
                        main_language_str = language
                        break
        except Exception as e:
            get_logger().exception(f"Failed to get main language: {e}")

        ## old approach:
        # most_common_extension = max(set(extension_list), key=extension_list.count)
        # if most_common_extension == 'py' and top_language == 'python' or \
        #         most_common_extension == 'js' and top_language == 'javascript' or \
        #         most_common_extension == 'ts' and top_language == 'typescript' or \
        #         most_common_extension == 'tsx' and top_language == 'typescript' or \
        #         most_common_extension == 'go' and top_language == 'go' or \
        #         most_common_extension == 'java' and top_language == 'java' or \
        #         most_common_extension == 'c' and top_language == 'c' or \
        #         most_common_extension == 'cpp' and top_language == 'c++' or \
        #         most_common_extension == 'cs' and top_language == 'c#' or \
        #         most_common_extension == 'swift' and top_language == 'swift' or \
        #         most_common_extension == 'php' and top_language == 'php' or \
        #         most_common_extension == 'rb' and top_language == 'ruby' or \
        #         most_common_extension == 'rs' and top_language == 'rust' or \
        #         most_common_extension == 'scala' and top_language == 'scala' or \
        #         most_common_extension == 'kt' and top_language == 'kotlin' or \
        #         most_common_extension == 'pl' and top_language == 'perl' or \
        #         most_common_extension == top_language:
        #     main_language_str = top_language

    except Exception as e:
        get_logger().exception(e)

    return main_language_str


class IncrementalPR:
    def __init__(self, is_incremental: bool = False):
        self.is_incremental = is_incremental
        self.commits_range = None
        self.first_new_commit = None
        self.last_seen_commit = None

    @property
    def first_new_commit_sha(self):
        return None if self.first_new_commit is None else self.first_new_commit.sha

    @property
    def last_seen_commit_sha(self):
        return None if self.last_seen_commit is None else self.last_seen_commit.sha


================================================
FILE: pr_agent/git_providers/gitea_provider.py
================================================
import json
from typing import Any, Dict, List, Optional, Set, Tuple
from urllib.parse import urlparse

import giteapy
from giteapy.rest import ApiException

from pr_agent.algo.file_filter import filter_ignored
from pr_agent.algo.language_handler import is_valid_file
from pr_agent.algo.types import EDIT_TYPE
from pr_agent.algo.utils import (clip_tokens,
                                 find_line_number_of_relevant_line_in_file)
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.git_provider import (MAX_FILES_ALLOWED_FULL,
                                                 FilePatchInfo, GitProvider,
                                                 IncrementalPR)
from pr_agent.log import get_logger


class GiteaProvider(GitProvider):
    def __init__(self, url: Optional[str] = None):
        super().__init__()
        self.logger = get_logger()

        if not url:
            self.logger.error("PR URL not provided.")
            raise ValueError("PR URL not provided.")

        self.base_url = get_settings().get("GITEA.URL", "https://gitea.com").rstrip("/")
        self.pr_url = ""
        self.issue_url = ""

        self.gitea_access_token = get_settings().get("GITEA.PERSONAL_ACCESS_TOKEN", None)
        if not self.gitea_access_token:
            self.logger.error("Gitea access token not found in settings.")
            raise ValueError("Gitea access token not found in settings.")

        self.repo_settings = get_settings().get("GITEA.REPO_SETTING", None)
        configuration = giteapy.Configuration()
        configuration.host = "{}/api/v1".format(self.base_url)
        configuration.api_key['Authorization'] = f'token {self.gitea_access_token}'

        if get_settings().get("GITEA.SKIP_SSL_VERIFICATION", False):
            configuration.verify_ssl = False

        # Use custom cert (self-signed)
        configuration.ssl_ca_cert = get_settings().get("GITEA.SSL_CA_CERT", None)

        client = giteapy.ApiClient(configuration)
        self.repo_api = RepoApi(client)
        self.owner = None
        self.repo = None
        self.pr_number = None
        self.issue_number = None
        self.max_comment_chars = 65000
        self.enabled_pr = False
        self.enabled_issue = False
        self.temp_comments = []
        self.pr = None
        self.git_files = []
        self.file_contents = {}
        self.file_diffs = {}
        self.sha = None
        self.diff_files = []
        self.incremental = IncrementalPR(False)
        self.comments_list = []
        self.unreviewed_files_set = dict()

        if "pulls" in url:
            self.pr_url = url
            self.__set_repo_and_owner_from_pr()
            self.enabled_pr = True
            self.pr = self.repo_api.get_pull_request(
                owner=self.owner,
                repo=self.repo,
                pr_number=self.pr_number
            )
            self.git_files = self.repo_api.get_change_file_pull_request(
                owner=self.owner,
                repo=self.repo,
                pr_number=self.pr_number
            )
            # Optional ignore with user custom
            self.git_files = filter_ignored(self.git_files, platform="gitea")

            self.sha = self.pr.head.sha if self.pr.head.sha else ""
            self.__add_file_content()
            self.__add_file_diff()
            self.pr_commits = self.repo_api.list_all_commits(
                owner=self.owner,
                repo=self.repo
            )
            self.last_commit = self.pr_commits[-1]
            self.last_commit_id = self.last_commit
            self.base_sha = self.pr.base.sha if self.pr.base.sha else ""
            self.base_ref = self.pr.base.ref if self.pr.base.ref else ""
        elif "issues" in url:
            self.issue_url = url
            self.__set_repo_and_owner_from_issue()
            self.enabled_issue = True
        else:
            self.pr_commits = None

    def __add_file_content(self):
        for file in self.git_files:
            file_path = file.get("filename")
            # Ignore file from default settings
            if not is_valid_file(file_path):
                continue

            if file_path and self.sha:
                try:
                    content = self.repo_api.get_file_content(
                        owner=self.owner,
                        repo=self.repo,
                        commit_sha=self.sha,
                        filepath=file_path
                    )
                    self.file_contents[file_path] = content
                except ApiException as e:
                    self.logger.error(f"Error getting file content for {file_path}: {str(e)}")
                    self.file_contents[file_path] = ""

    def __add_file_diff(self):
        try:
            diff_contents = self.repo_api.get_pull_request_diff(
                    owner=self.owner,
                    repo=self.repo,
                    pr_number=self.pr_number
            )

            lines = diff_contents.splitlines()
            current_file = None
            current_patch = []
            file_patches = {}
            for line in lines:
                if line.startswith('diff --git'):
                    if current_file and current_patch:
                        file_patches[current_file] = '\n'.join(current_patch)
                        current_patch = []
                    current_file = line.split(' b/')[-1]
                elif line.startswith('@@'):
                    current_patch = [line]
                elif current_patch:
                    current_patch.append(line)

            if current_file and current_patch:
                file_patches[current_file] = '\n'.join(current_patch)

            self.file_diffs = file_patches
        except Exception as e:
            self.logger.error(f"Error getting diff content: {str(e)}")

    def _parse_pr_url(self, pr_url: str) -> Tuple[str, str, int]:
        parsed_url = urlparse(pr_url)

        if parsed_url.path.startswith('/api/v1'):
            parsed_url = urlparse(pr_url.replace("/api/v1", ""))

        path_parts = parsed_url.path.strip('/').split('/')
        if len(path_parts) < 4 or path_parts[2] != 'pulls':
            raise ValueError("The provided URL does not appear to be a Gitea PR URL")

        try:
            pr_number = int(path_parts[3])
        except ValueError as e:
            raise ValueError("Unable to convert PR number to integer") from e

        owner = path_parts[0]
        repo = path_parts[1]

        return owner, repo, pr_number

    def _parse_issue_url(self, issue_url: str) -> Tuple[str, str, int]:
        parsed_url = urlparse(issue_url)

        if parsed_url.path.startswith('/api/v1'):
            parsed_url = urlparse(issue_url.replace("/api/v1", ""))

        path_parts = parsed_url.path.strip('/').split('/')
        if len(path_parts) < 4 or path_parts[2] != 'issues':
            raise ValueError("The provided URL does not appear to be a Gitea issue URL")

        try:
            issue_number = int(path_parts[3])
        except ValueError as e:
            raise ValueError("Unable to convert issue number to integer") from e

        owner = path_parts[0]
        repo = path_parts[1]

        return owner, repo, issue_number

    def __set_repo_and_owner_from_pr(self):
        """Extract owner and repo from the PR URL"""
        try:
            owner, repo, pr_number = self._parse_pr_url(self.pr_url)
            self.owner = owner
            self.repo = repo
            self.pr_number = pr_number
            self.logger.info(f"Owner: {self.owner}, Repo: {self.repo}, PR Number: {self.pr_number}")
        except ValueError as e:
            self.logger.error(f"Error parsing PR URL: {str(e)}")
        except Exception as e:
            self.logger.error(f"Unexpected error: {str(e)}")

    def __set_repo_and_owner_from_issue(self):
        """Extract owner and repo from the issue URL"""
        try:
            owner, repo, issue_number = self._parse_issue_url(self.issue_url)
            self.owner = owner
            self.repo = repo
            self.issue_number = issue_number
            self.logger.info(f"Owner: {self.owner}, Repo: {self.repo}, Issue Number: {self.issue_number}")
        except ValueError as e:
            self.logger.error(f"Error parsing issue URL: {str(e)}")
        except Exception as e:
            self.logger.error(f"Unexpected error: {str(e)}")

    def get_pr_url(self) -> str:
        return self.pr_url

    def get_issue_url(self) -> str:
        return self.issue_url

    def get_latest_commit_url(self) -> str:
        return self.last_commit.html_url

    def get_comment_url(self, comment) -> str:
        return comment.html_url

    def publish_persistent_comment(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)

    def publish_comment(self, comment: str,is_temporary: bool = False) -> None:
        """Publish a comment to the pull request"""
        if is_temporary and not get_settings().config.publish_output_progress:
            get_logger().debug(f"Skipping publish_comment for temporary comment")
            return None

        if self.enabled_issue:
            index = self.issue_number
        elif self.enabled_pr:
            index = self.pr_number
        else:
            self.logger.error("Neither PR nor issue URL provided.")
            return None

        comment = self.limit_output_characters(comment, self.max_comment_chars)
        response = self.repo_api.create_comment(
            owner=self.owner,
            repo=self.repo,
            index=index,
            comment=comment
        )

        if not response:
            self.logger.error("Failed to publish comment")
            return None

        if is_temporary:
            self.temp_comments.append(comment)

        comment_obj = {
            "is_temporary": is_temporary,
            "comment": comment,
            "comment_id": response.id if isinstance(response, tuple) else response.id
        }
        self.comments_list.append(comment_obj)
        self.logger.info("Comment published")
        return comment_obj

    def edit_comment(self, comment, body : str):
        body = self.limit_output_characters(body, self.max_comment_chars)
        try:
            self.repo_api.edit_comment(
                owner=self.owner,
                repo=self.repo,
                comment_id=comment.get("comment_id") if isinstance(comment, dict) else comment.id,
                comment=body
            )
        except ApiException as e:
            self.logger.error(f"Error editing comment: {e}")
            return None
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")
            return None


    def publish_inline_comment(self,body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        """Publish an inline comment on a specific line"""
        body = self.limit_output_characters(body, self.max_comment_chars)
        position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files,
                                                                                relevant_file.strip('`'),
                                                                                relevant_line_in_file,
                                                                                )
        if position == -1:
            get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            subject_type = "FILE"
        else:
            subject_type = "LINE"

        path = relevant_file.strip()
        payload = dict(body=body, path=path, old_position=position,new_position = absolute_position) if subject_type == "LINE" else {}
        self.publish_inline_comments([payload])


    def publish_inline_comments(self, comments: List[Dict[str, Any]],body : str = "Inline comment") -> None:
        response = self.repo_api.create_inline_comment(
            owner=self.owner,
            repo=self.repo,
            pr_number=self.pr_number if self.enabled_pr else self.issue_number,
            body=body,
            commit_id=self.last_commit.sha if self.last_commit else "",
            comments=comments
        )

        if not response:
            self.logger.error("Failed to publish inline comment")
            return

        self.logger.info("Inline comment published")

    def publish_code_suggestions(self, suggestions: List[Dict[str, Any]]):
        """Publish code suggestions"""
        for suggestion in suggestions:
            body = suggestion.get("body","")
            if not body:
                self.logger.error("No body provided for the suggestion")
                continue

            path = suggestion.get("relevant_file","")
            new_position = suggestion.get("relevant_lines_start",0)
            old_position = suggestion.get("relevant_lines_start",0) if "original_suggestion" not in suggestion else suggestion["original_suggestion"].get("relevant_lines_start",0)
            title_body = suggestion["original_suggestion"].get("suggestion_content","") if "original_suggestion" in suggestion else ""
            payload = dict(body=body, path=path, old_position=old_position,new_position = new_position)
            if title_body:
                title_body = f"**Suggestion:** {title_body}"
                self.publish_inline_comments([payload],title_body)
            else:
                self.publish_inline_comments([payload])

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        """Add eyes reaction to a comment"""
        try:
            if disable_eyes:
                return None

            comments = self.repo_api.list_all_comments(
                owner=self.owner,
                repo=self.repo,
                index=self.pr_number if self.enabled_pr else self.issue_number
            )

            comment_ids = [comment.id for comment in comments]
            if issue_comment_id not in comment_ids:
                self.logger.error(f"Comment ID {issue_comment_id} not found. Available IDs: {comment_ids}")
                return None

            response = self.repo_api.add_reaction_comment(
                owner=self.owner,
                repo=self.repo,
                comment_id=issue_comment_id,
                reaction="eyes"
            )

            if not response:
                self.logger.error("Failed to add eyes reaction")
                return None

            return response[0].id if isinstance(response, tuple) else response.id

        except ApiException as e:
            self.logger.error(f"Error adding eyes reaction: {e}")
            return None
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")
            return None

    def remove_reaction(self, comment_id: int) -> None:
        """Remove reaction from a comment"""
        try:
            response = self.repo_api.remove_reaction_comment(
                owner=self.owner,
                repo=self.repo,
                comment_id=comment_id
            )
            if not response:
                self.logger.error("Failed to remove reaction")
        except ApiException as e:
            self.logger.error(f"Error removing reaction: {e}")
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")

    def get_commit_messages(self)-> str:
        """Get commit messages for the PR"""
        max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None)
        pr_commits = self.repo_api.get_pr_commits(
            owner=self.owner,
            repo=self.repo,
            pr_number=self.pr_number
        )

        if not pr_commits:
            self.logger.error("Failed to get commit messages")
            return ""

        try:
            commit_messages = [commit["commit"]["message"] for commit in pr_commits if commit]

            if not commit_messages:
                self.logger.error("No commit messages found")
                return ""

            commit_message = "".join(commit_messages)
            if max_tokens:
                commit_message = clip_tokens(commit_message, max_tokens)

            return commit_message
        except Exception as e:
            self.logger.error(f"Error processing commit messages: {str(e)}")
            return ""

    def _get_file_content_from_base(self, filename: str) -> str:
        return self.repo_api.get_file_content(
            owner=self.owner,
            repo=self.repo,
            commit_sha=self.base_sha,
            filepath=filename
        )

    def _get_file_content_from_latest_commit(self, filename: str) -> str:
        return self.repo_api.get_file_content(
            owner=self.owner,
            repo=self.repo,
            commit_sha=self.last_commit.sha,
            filepath=filename
        )

    def get_diff_files(self) -> List[FilePatchInfo]:
        """Get files that were modified in the PR"""
        if self.diff_files:
            return self.diff_files

        invalid_files_names = []
        counter_valid = 0
        diff_files = []
        for file in self.git_files:
            filename = file.get("filename")
            if not filename:
                continue

            if not is_valid_file(filename):
                invalid_files_names.append(filename)
                continue

            counter_valid += 1
            avoid_load = False
            patch = self.file_diffs.get(filename,"")
            head_file = ""
            base_file = ""

            if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental:
                avoid_load = True
                if counter_valid == MAX_FILES_ALLOWED_FULL:
                    self.logger.info("Too many files in PR, will avoid loading full content for rest of files")

            if avoid_load:
                head_file = ""
            else:
                # Get file content from this pr
                head_file = self.file_contents.get(filename,"")

            if self.incremental.is_incremental and self.unreviewed_files_set:
                base_file = self._get_file_content_from_latest_commit(filename)
                self.unreviewed_files_set[filename] = patch
            else:
                if avoid_load:
                    base_file = ""
                else:
                    base_file = self._get_file_content_from_base(filename)

            num_plus_lines = file.get("additions",0)
            num_minus_lines = file.get("deletions",0)
            status = file.get("status","")

            if status == 'added':
                edit_type = EDIT_TYPE.ADDED
            elif status == 'removed' or status == 'deleted':
                edit_type = EDIT_TYPE.DELETED
            elif status == 'renamed':
                edit_type = EDIT_TYPE.RENAMED
            elif status == 'modified' or status == 'changed':
                edit_type = EDIT_TYPE.MODIFIED
            else:
                self.logger.error(f"Unknown edit type: {status}")
                edit_type = EDIT_TYPE.UNKNOWN

            file_patch_info = FilePatchInfo(
                base_file=base_file,
                head_file=head_file,
                patch=patch,
                filename=filename,
                num_minus_lines=num_minus_lines,
                num_plus_lines=num_plus_lines,
                edit_type=edit_type
            )
            diff_files.append(file_patch_info)

        if invalid_files_names:
            self.logger.info(f"Filtered out files with invalid extensions: {invalid_files_names}")

        self.diff_files = diff_files
        return diff_files

    def get_line_link(self, relevant_file, relevant_line_start, relevant_line_end = None) -> str:
        if relevant_line_start == -1:
            link = f"{self.base_url}/{self.owner}/{self.repo}/src/branch/{self.get_pr_branch()}/{relevant_file}"
        elif relevant_line_end:
            link = f"{self.base_url}/{self.owner}/{self.repo}/src/branch/{self.get_pr_branch()}/{relevant_file}#L{relevant_line_start}-L{relevant_line_end}"
        else:
            link = f"{self.base_url}/{self.owner}/{self.repo}/src/branch/{self.get_pr_branch()}/{relevant_file}#L{relevant_line_start}"

        self.logger.info(f"Generated link: {link}")
        return link

    def get_pr_id(self):
        try:
            pr_id = f"{self.repo}/{self.pr_number}"
            return pr_id
        except:
            return ""

    def get_files(self) -> List[Dict[str, Any]]:
        """Get all files in the PR"""
        return [file.get("filename","") for file in self.git_files]

    def get_num_of_files(self) -> int:
        """Get number of files changed in the PR"""
        return len(self.git_files)

    def get_issue_comments(self) -> List[Dict[str, Any]]:
        """Get all comments in the PR"""
        index = self.issue_number if self.enabled_issue else self.pr_number
        comments = self.repo_api.list_all_comments(
            owner=self.owner,
            repo=self.repo,
            index=index
        )
        if not comments:
            self.logger.error("Failed to get comments")
            return []

        return comments

    def get_languages(self) -> Set[str]:
        """Get programming languages used in the repository"""
        languages = self.repo_api.get_languages(
            owner=self.owner,
            repo=self.repo
        )

        return languages

    def get_pr_branch(self) -> str:
        """Get the branch name of the PR"""
        if not self.pr:
            self.logger.error("Failed to get PR branch")
            return ""

        if not self.pr.head:
            self.logger.error("PR head not found")
            return ""

        return self.pr.head.ref if self.pr.head.ref else ""

    def get_pr_description_full(self) -> str:
        """Get full PR description with metadata"""
        if not self.pr:
            self.logger.error("Failed to get PR description")
            return ""

        return self.pr.body if self.pr.body else ""

    def get_pr_labels(self,update=False) -> List[str]:
        """Get labels assigned to the PR"""
        if not update:
            if not self.pr.labels:
                self.logger.error("Failed to get PR labels")
                return []
            return [label.name for label in self.pr.labels]

        labels = self.repo_api.get_issue_labels(
            owner=self.owner,
            repo=self.repo,
            issue_number=self.pr_number
        )
        if not labels:
            self.logger.error("Failed to get PR labels")
            return []

        return [label.name for label in labels]

    def get_repo_settings(self) -> str:
        """Get repository settings"""
        if not self.repo_settings:
            self.logger.error("Repository settings not found")
            return ""

        response = self.repo_api.get_file_content(
            owner=self.owner,
            repo=self.repo,
            commit_sha=self.sha,
            filepath=self.repo_settings
        )
        if not response:
            self.logger.error("Failed to get repository settings")
            return ""

        return response

    def get_user_id(self) -> str:
        """Get the ID of the authenticated user"""
        return f"{self.pr.user.id}" if self.pr else ""

    def is_supported(self, capability) -> bool:
        """Check if the provider is supported"""
        return True

    def get_git_repo_url(self, issues_or_pr_url: str) -> str:
        return f"{self.base_url}/{self.owner}/{self.repo}.git" #base_url / <OWNER>/<REPO>.git

    def publish_description(self, pr_title: str, pr_body: str) -> None:
        """Publish PR description"""
        response = self.repo_api.edit_pull_request(
            owner=self.owner,
            repo=self.repo,
            pr_number=self.pr_number if self.enabled_pr else self.issue_number,
            title=pr_title,
            body=pr_body
        )

        if not response:
            self.logger.error("Failed to publish PR description")
            return None

        self.logger.info("PR description published successfully")
        if self.enabled_pr:
            self.pr = self.repo_api.get_pull_request(
                owner=self.owner,
                repo=self.repo,
                pr_number=self.pr_number
            )

    def publish_labels(self, labels: List[int]) -> None:
        """Publish labels to the PR"""
        if not labels:
            self.logger.error("No labels provided to publish")
            return None

        response = self.repo_api.add_labels(
            owner=self.owner,
            repo=self.repo,
            issue_number=self.pr_number if self.enabled_pr else self.issue_number,
            labels=labels
        )

        if response:
            self.logger.info("Labels added successfully")

    def remove_comment(self, comment) -> None:
        """Remove a specific comment"""
        if not comment:
            return

        try:
            comment_id = comment.get("comment_id") if isinstance(comment, dict) else comment.id
            if not comment_id:
                self.logger.error("Comment ID not found")
                return None
            self.repo_api.remove_comment(
                owner=self.owner,
                repo=self.repo,
                comment_id=comment_id
            )

            if self.comments_list and comment in self.comments_list:
                self.comments_list.remove(comment)

            self.logger.info(f"Comment removed successfully: {comment}")
        except ApiException as e:
            self.logger.error(f"Error removing comment: {e}")
            raise e

    def remove_initial_comment(self) -> None:
        """Remove the initial comment"""
        for comment in self.comments_list:
            try:
                if not comment.get("is_temporary"):
                    continue
                self.remove_comment(comment)
            except Exception as e:
                self.logger.error(f"Error removing comment: {e}")
                continue
            self.logger.info(f"Removed initial comment: {comment.get('comment_id')}")

    #Clone related
    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
        #For example, to clone:
        #https://github.com/Codium-ai/pr-agent-pro.git
        #Need to embed inside the github token:
        #https://<token>@github.com/Codium-ai/pr-agent-pro.git

        gitea_token = self.gitea_access_token
        gitea_base_url = self.base_url
        scheme = gitea_base_url.split("://")[0]
        scheme += "://"
        if not all([gitea_token, gitea_base_url]):
            get_logger().error("Either missing auth token or missing base url")
            return None
        base_url = gitea_base_url.split(scheme)[1]
        if not base_url:
            get_logger().error(f"Base url: {gitea_base_url} has an empty base url")
            return None
        if base_url not in repo_url_to_clone:
            get_logger().error(f"url to clone: {repo_url_to_clone} does not contain {base_url}")
            return None
        repo_full_name = repo_url_to_clone.split(base_url)[-1]
        if not repo_full_name:
            get_logger().error(f"url to clone: {repo_url_to_clone} is malformed")
            return None

        clone_url = scheme
        clone_url += f"{gitea_token}@{base_url}{repo_full_name}"
        return clone_url

class RepoApi(giteapy.RepositoryApi):
    def __init__(self, client: giteapy.ApiClient):
        self.repository = giteapy.RepositoryApi(client)
        self.issue = giteapy.IssueApi(client)
        self.logger = get_logger()
        super().__init__(client)

    def create_inline_comment(self, owner: str, repo: str, pr_number: int, body : str ,commit_id : str, comments: List[Dict[str, Any]]):
        body = {
            "body": body,
            "comments": comments,
            "commit_id": commit_id,
        }
        return self.api_client.call_api(
            '/repos/{owner}/{repo}/pulls/{pr_number}/reviews',
            'POST',
            path_params={'owner': owner, 'repo': repo, 'pr_number': pr_number},
            body=body,
            response_type='Repository',
            auth_settings=['AuthorizationHeaderToken']
        )

    def create_comment(self, owner: str, repo: str, index: int, comment: str):
        body = {
            "body": comment
        }
        return self.issue.issue_create_comment(
            owner=owner,
            repo=repo,
            index=index,
            body=body
        )

    def edit_comment(self, owner: str, repo: str, comment_id: int, comment: str):
        body = {
            "body": comment
        }
        return self.issue.issue_edit_comment(
            owner=owner,
            repo=repo,
            id=comment_id,
            body=body
        )

    def remove_comment(self, owner: str, repo: str, comment_id: int):
        return self.issue.issue_delete_comment(
            owner=owner,
            repo=repo,
            id=comment_id
        )

    def list_all_comments(self, owner: str, repo: str, index: int):
        return self.issue.issue_get_comments(
            owner=owner,
            repo=repo,
            index=index
        )

    def get_pull_request_diff(self, owner: str, repo: str, pr_number: int) -> str:
        """Get the diff content of a pull request using direct API call"""
        try:
            url = f'/repos/{owner}/{repo}/pulls/{pr_number}.diff'

            response = self.api_client.call_api(
                url,
                'GET',
                path_params={},
                response_type=None,
                _return_http_data_only=False,
                _preload_content=False,
                auth_settings=['AuthorizationHeaderToken']
            )

            if hasattr(response, 'data'):
                raw_data = response.data.read()
                return raw_data.decode('utf-8')
            elif isinstance(response, tuple):
                raw_data = response[0].read()
                return raw_data.decode('utf-8')
            else:
                error_msg = f"Unexpected response format received from API: {type(response)}"
                self.logger.error(error_msg)
                raise RuntimeError(error_msg)

        except ApiException as e:
            self.logger.error(f"Error getting diff: {str(e)}")
            raise e
        except Exception as e:
            self.logger.error(f"Unexpected error: {str(e)}")
            raise e

    def get_pull_request(self, owner: str, repo: str, pr_number: int):
        """Get pull request details including description"""
        return self.repository.repo_get_pull_request(
            owner=owner,
            repo=repo,
            index=pr_number
        )

    def edit_pull_request(self, owner: str, repo: str, pr_number: int,title : str, body: str):
        """Edit pull request description"""
        body = {
            "body": body,
            "title" : title
        }
        return self.repository.repo_edit_pull_request(
            owner=owner,
            repo=repo,
            index=pr_number,
            body=body
        )

    def get_change_file_pull_request(self, owner: str, repo: str, pr_number: int):
        """Get changed files in the pull request"""
        try:
            url = f'/repos/{owner}/{repo}/pulls/{pr_number}/files'

            response = self.api_client.call_api(
                url,
                'GET',
                path_params={},
                response_type=None,
                _return_http_data_only=False,
                _preload_content=False,
                auth_settings=['AuthorizationHeaderToken']
            )

            if hasattr(response, 'data'):
                raw_data = response.data.read()
                diff_content = raw_data.decode('utf-8')
                return json.loads(diff_content) if isinstance(diff_content, str) else diff_content
            elif isinstance(response, tuple):
                raw_data = response[0].read()
                diff_content = raw_data.decode('utf-8')
                return json.loads(diff_content) if isinstance(diff_content, str) else diff_content

            return []

        except ApiException as e:
            self.logger.error(f"Error getting changed files: {e}")
            return []
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")
            return []

    def get_languages(self, owner: str, repo: str):
        """Get programming languages used in the repository"""
        try:
            url = f'/repos/{owner}/{repo}/languages'

            response = self.api_client.call_api(
                url,
                'GET',
                path_params={},
                response_type=None,
                _return_http_data_only=False,
                _preload_content=False,
                auth_settings=['AuthorizationHeaderToken']
            )

            if hasattr(response, 'data'):
                raw_data = response.data.read()
                return json.loads(raw_data.decode('utf-8'))
            elif isinstance(response, tuple):
                raw_data = response[0].read()
                return json.loads(raw_data.decode('utf-8'))

            return {}

        except ApiException as e:
            self.logger.error(f"Error getting languages: {e}")
            return {}
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")
            return {}

    def get_file_content(self, owner: str, repo: str, commit_sha: str, filepath: str) -> str:
        """Get raw file content from a specific commit"""

        try:
            url = f'/repos/{owner}/{repo}/raw/{filepath}'
            query_params = []
            if commit_sha:
                query_params.append(('ref', commit_sha))

            response = self.api_client.call_api(
                url,
                'GET',
                path_params={},
                query_params=query_params,
                response_type=None,
                _return_http_data_only=False,
                _preload_content=False,
                auth_settings=['AuthorizationHeaderToken']
            )

            if hasattr(response, 'data'):
                raw_data = response.data.read()
                return raw_data.decode('utf-8')
            elif isinstance(response, tuple):
                raw_data = response[0].read()
                return raw_data.decode('utf-8')

            return ""

        except ApiException as e:
            self.logger.error(f"Error getting file: {filepath}, content: {e}")
            return ""
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")
            return ""

    def get_issue_labels(self, owner: str, repo: str, issue_number: int):
        """Get labels assigned to the issue"""
        return self.issue.issue_get_labels(
            owner=owner,
            repo=repo,
            index=issue_number
        )

    def list_all_commits(self, owner: str, repo: str):
        return self.repository.repo_get_all_commits(
            owner=owner,
            repo=repo
        )

    def add_reviewer(self, owner: str, repo: str, pr_number: int, reviewers: List[str]):
        body = {
            "reviewers": reviewers
        }
        return self.api_client.call_api(
            '/repos/{owner}/{repo}/pulls/{pr_number}/requested_reviewers',
            'POST',
            path_params={'owner': owner, 'repo': repo, 'pr_number': pr_number},
            body=body,
            response_type='Repository',
            auth_settings=['AuthorizationHeaderToken']
        )

    def add_reaction_comment(self, owner: str, repo: str, comment_id: int, reaction: str):
        body = {
            "content": reaction
        }
        return self.api_client.call_api(
            '/repos/{owner}/{repo}/issues/comments/{id}/reactions',
            'POST',
            path_params={'owner': owner, 'repo': repo, 'id': comment_id},
            body=body,
            response_type='Repository',
            auth_settings=['AuthorizationHeaderToken']
        )

    def remove_reaction_comment(self, owner: str, repo: str, comment_id: int):
        return self.api_client.call_api(
            '/repos/{owner}/{repo}/issues/comments/{id}/reactions',
            'DELETE',
            path_params={'owner': owner, 'repo': repo, 'id': comment_id},
            response_type='Repository',
            auth_settings=['AuthorizationHeaderToken']
        )

    def add_labels(self, owner: str, repo: str, issue_number: int, labels: List[int]):
        body = {
            "labels": labels
        }
        return self.issue.issue_add_label(
            owner=owner,
            repo=repo,
            index=issue_number,
            body=body
        )

    def get_pr_commits(self, owner: str, repo: str, pr_number: int):
        """Get all commits in a pull request"""
        try:
            url = f'/repos/{owner}/{repo}/pulls/{pr_number}/commits'

            response = self.api_client.call_api(
                url,
                'GET',
                path_params={},
                response_type=None,
                _return_http_data_only=False,
                _preload_content=False,
                auth_settings=['AuthorizationHeaderToken']
            )

            if hasattr(response, 'data'):
                raw_data = response.data.read()
                commits_data = json.loads(raw_data.decode('utf-8'))
                return commits_data
            elif isinstance(response, tuple):
                raw_data = response[0].read()
                commits_data = json.loads(raw_data.decode('utf-8'))
                return commits_data

            return []

        except ApiException as e:
            self.logger.error(f"Error getting PR commits: {e}")
            return []
        except Exception as e:
            self.logger.error(f"Unexpected error: {e}")
            return []


================================================
FILE: pr_agent/git_providers/github_provider.py
================================================
import copy
import difflib
import hashlib
import itertools
import re
import time
import traceback
import json
from datetime import datetime
from typing import Optional, Tuple
from urllib.parse import urlparse

from github.Issue import Issue
from github import AppAuthentication, Auth, Github, GithubException
from retry import retry
from starlette_context import context

from ..algo.file_filter import filter_ignored
from ..algo.git_patch_processing import extract_hunk_headers
from ..algo.language_handler import is_valid_file
from ..algo.types import EDIT_TYPE
from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
                          find_line_number_of_relevant_line_in_file,
                          load_large_diff, set_file_languages)
from ..config_loader import get_settings
from ..log import get_logger
from ..servers.utils import RateLimitExceeded
from .git_provider import (MAX_FILES_ALLOWED_FULL, FilePatchInfo, GitProvider,
                           IncrementalPR)


class GithubProvider(GitProvider):
    def __init__(self, pr_url: Optional[str] = None):
        self.repo_obj = None
        try:
            self.installation_id = context.get("installation_id", None)
        except Exception:
            self.installation_id = None
        self.max_comment_chars = 65000
        self.base_url = get_settings().get("GITHUB.BASE_URL", "https://api.github.com").rstrip("/") # "https://api.github.com"
        self.base_url_html = self.base_url.split("api/")[0].rstrip("/") if "api/" in self.base_url else "https://github.com"
        self.github_client = self._get_github_client()
        self.repo = None
        self.pr_num = None
        self.pr = None
        self.issue_main = None
        self.github_user_id = None
        self.diff_files = None
        self.git_files = None
        self.incremental = IncrementalPR(False)
        if pr_url and 'pull' in pr_url:
            self.set_pr(pr_url)
            self.pr_commits = list(self.pr.get_commits())
            self.last_commit_id = self.pr_commits[-1]
            self.pr_url = self.get_pr_url() # pr_url for github actions can be as api.github.com, so we need to get the url from the pr object
        elif pr_url and 'issue' in pr_url: #url is an issue
            self.issue_main = self._get_issue_handle(pr_url)
        else: #Instantiated the provider without a PR / Issue
            self.pr_commits = None

    def _get_issue_handle(self, issue_url) -> Optional[Issue]:
        repo_name, issue_number = self._parse_issue_url(issue_url)
        if not repo_name or not issue_number:
            get_logger().error(f"Given url: {issue_url} is not a valid issue.")
            return None
        # else: Check if can get a valid Repo handle:
        try:
            repo_obj = self.github_client.get_repo(repo_name)
            if not repo_obj:
                get_logger().error(f"Given url: {issue_url}, belonging to owner/repo: {repo_name} does "
                                   f"not have a valid repository: {self.get_git_repo_url(issue_url)}")
                return None
            # else: Valid repo handle:
            return repo_obj.get_issue(issue_number)
        except Exception as e:
            get_logger().exception(f"Failed to get an issue object for issue: {issue_url}, belonging to owner/repo: {repo_name}")
            return None

    def get_incremental_commits(self, incremental=IncrementalPR(False)):
        self.incremental = incremental
        if self.incremental.is_incremental:
            self.unreviewed_files_set = dict()
            self._get_incremental_commits()

    def is_supported(self, capability: str) -> bool:
        return True

    def _get_owner_and_repo_path(self, given_url: str) -> str:
        try:
            repo_path = None
            if 'issues' in given_url:
                repo_path, _ = self._parse_issue_url(given_url)
            elif 'pull' in given_url:
                repo_path, _ = self._parse_pr_url(given_url)
            elif given_url.endswith('.git'):
                parsed_url = urlparse(given_url)
                repo_path = (parsed_url.path.split('.git')[0])[1:] # /<owner>/<repo>.git -> <owner>/<repo>
            if not repo_path:
                get_logger().error(f"url is neither an issues url nor a PR url nor a valid git url: {given_url}. Returning empty result.")
                return ""
            return repo_path
        except Exception as e:
            get_logger().exception(f"unable to parse url: {given_url}. Returning empty result.")
            return ""

    def get_git_repo_url(self, issues_or_pr_url: str) -> str:
        repo_path = self._get_owner_and_repo_path(issues_or_pr_url) #Return: <OWNER>/<REPO>
        if not repo_path or repo_path not in issues_or_pr_url:
            get_logger().error(f"Unable to retrieve owner/path from url: {issues_or_pr_url}")
            return ""
        return f"{self.base_url_html}/{repo_path}.git" #https://github.com / <OWNER>/<REPO>.git

    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.
    # Example: https://github.com/qodo-ai/pr-agent.git and branch: v0.8 -> prefix: "https://github.com/qodo-ai/pr-agent/blob/v0.8", suffix: ""
    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.
    def get_canonical_url_parts(self, repo_git_url:str, desired_branch:str) -> Tuple[str, str]:
        owner = None
        repo = None
        scheme_and_netloc = None

        if repo_git_url or self.issue_main: #Either user provided an external git url, which may be different than what this provider was initialized with, or an issue:
            desired_branch = desired_branch if repo_git_url else self.issue_main.repository.default_branch
            html_url = repo_git_url if repo_git_url else self.issue_main.html_url
            parsed_git_url = urlparse(html_url)
            scheme_and_netloc = parsed_git_url.scheme + "://" + parsed_git_url.netloc
            repo_path = self._get_owner_and_repo_path(html_url)
            if repo_path.count('/') == 1: #Has to have the form <owner>/<repo>
                owner, repo = repo_path.split('/')
            else:
                get_logger().error(f"Invalid repo_path: {repo_path} from url: {html_url}")
                return ("", "")

        if (not owner or not repo) and self.repo: #"else" - User did not provide an external git url, or not an issue, use self.repo object
            owner, repo = self.repo.split('/')
            scheme_and_netloc = self.base_url_html
            desired_branch = self.repo_obj.default_branch
        if not all([scheme_and_netloc, owner, repo]): #"else": Not invoked from a PR context,but no provided git url for context
            get_logger().error(f"Unable to get canonical url parts since missing context (PR or explicit git url)")
            return ("", "")

        prefix = f"{scheme_and_netloc}/{owner}/{repo}/blob/{desired_branch}"
        suffix = ""  # github does not add a suffix
        return (prefix, suffix)

    def get_pr_url(self) -> str:
        return self.pr.html_url

    def set_pr(self, pr_url: str):
        self.repo, self.pr_num = self._parse_pr_url(pr_url)
        self.pr = self._get_pr()

    def _get_incremental_commits(self):
        if not self.pr_commits:
            self.pr_commits = list(self.pr.get_commits())

        self.previous_review = self.get_previous_review(full=True, incremental=True)
        if self.previous_review:
            self.incremental.commits_range = self.get_commit_range()
            # Get all files changed during the commit range

            for commit in self.incremental.commits_range:
                if commit.commit.message.startswith(f"Merge branch '{self._get_repo().default_branch}'"):
                    get_logger().info(f"Skipping merge commit {commit.commit.message}")
                    continue
                self.unreviewed_files_set.update({file.filename: file for file in commit.files})
        else:
            get_logger().info("No previous review found, will review the entire PR")
            self.incremental.is_incremental = False

    def get_commit_range(self):
        last_review_time = self.previous_review.created_at
        first_new_commit_index = None
        for index in range(len(self.pr_commits) - 1, -1, -1):
            if self.pr_commits[index].commit.author.date > last_review_time:
                self.incremental.first_new_commit = self.pr_commits[index]
                first_new_commit_index = index
            else:
                self.incremental.last_seen_commit = self.pr_commits[index]
                break
        return self.pr_commits[first_new_commit_index:] if first_new_commit_index is not None else []

    def get_previous_review(self, *, full: bool, incremental: bool):
        if not (full or incremental):
            raise ValueError("At least one of full or incremental must be True")
        if not getattr(self, "comments", None):
            self.comments = list(self.pr.get_issue_comments())
        prefixes = []
        if full:
            prefixes.append(PRReviewHeader.REGULAR.value)
        if incremental:
            prefixes.append(PRReviewHeader.INCREMENTAL.value)
        for index in range(len(self.comments) - 1, -1, -1):
            if any(self.comments[index].body.startswith(prefix) for prefix in prefixes):
                return self.comments[index]

    def get_files(self):
        if self.incremental.is_incremental and self.unreviewed_files_set:
            return self.unreviewed_files_set.values()
        try:
            git_files = context.get("git_files", None)
            if git_files:
                return git_files
            self.git_files = list(self.pr.get_files()) # 'list' to handle pagination
            context["git_files"] = self.git_files
            return self.git_files
        except Exception:
            if not self.git_files:
                self.git_files = list(self.pr.get_files())
            return self.git_files

    def get_num_of_files(self):
        if hasattr(self.git_files, "totalCount"):
            return self.git_files.totalCount
        else:
            try:
                return len(self.git_files)
            except Exception as e:
                return -1

    @retry(exceptions=RateLimitExceeded,
           tries=get_settings().github.ratelimit_retries, delay=2, backoff=2, jitter=(1, 3))
    def get_diff_files(self) -> list[FilePatchInfo]:
        """
        Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in GitHub,
        along with their content and patch information.

        Returns:
            diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted,
            or renamed files in the merge request.
        """
        try:
            try:
                diff_files = context.get("diff_files", None)
                if diff_files:
                    return diff_files
            except Exception:
                pass

            if self.diff_files:
                return self.diff_files

            # filter files using [ignore] patterns
            files_original = self.get_files()
            files = filter_ignored(files_original)
            if files_original != files:
                try:
                    names_original = [file.filename for file in files_original]
                    names_new = [file.filename for file in files]
                    get_logger().info(f"Filtered out [ignore] files for pull request:", extra=
                    {"files": names_original,
                     "filtered_files": names_new})
                except Exception:
                    pass

            diff_files = []
            invalid_files_names = []
            is_close_to_rate_limit = False

            # The base.sha will point to the current state of the base branch (including parallel merges), not the original base commit when the PR was created
            # We can fix this by finding the merge base commit between the PR head and base branches
            # Note that The pr.head.sha is actually correct as is - it points to the latest commit in your PR branch.
            # This SHA isn't affected by parallel merges to the base branch since it's specific to your PR's branch.
            repo = self.repo_obj
            pr = self.pr
            try:
                compare = repo.compare(pr.base.sha, pr.head.sha) # communication with GitHub
                merge_base_commit = compare.merge_base_commit
            except Exception as e:
                get_logger().error(f"Failed to get merge base commit: {e}")
                merge_base_commit = pr.base
            if merge_base_commit.sha != pr.base.sha:
                get_logger().info(
                    f"Using merge base commit {merge_base_commit.sha} instead of base commit ")

            counter_valid = 0
            for file in files:
                if not is_valid_file(file.filename):
                    invalid_files_names.append(file.filename)
                    continue

                patch = file.patch
                if is_close_to_rate_limit:
                    new_file_content_str = ""
                    original_file_content_str = ""
                else:
                    # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only
                    counter_valid += 1
                    avoid_load = False
                    if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental:
                        avoid_load = True
                        if counter_valid == MAX_FILES_ALLOWED_FULL:
                            get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files")

                    if avoid_load:
                        new_file_content_str = ""
                    else:
                        new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha)  # communication with GitHub

                    if self.incremental.is_incremental and self.unreviewed_files_set:
                        original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha)
                        patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)
                        self.unreviewed_files_set[file.filename] = patch
                    else:
                        if avoid_load:
                            original_file_content_str = ""
                        else:
                            original_file_content_str = self._get_pr_file_content(file, merge_base_commit.sha)
                            # original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha)
                        if not patch:
                            patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str)


                if file.status == 'added':
                    edit_type = EDIT_TYPE.ADDED
                elif file.status == 'removed':
                    edit_type = EDIT_TYPE.DELETED
                elif file.status == 'renamed':
                    edit_type = EDIT_TYPE.RENAMED
                elif file.status == 'modified':
                    edit_type = EDIT_TYPE.MODIFIED
                else:
                    get_logger().error(f"Unknown edit type: {file.status}")
                    edit_type = EDIT_TYPE.UNKNOWN

                # count number of lines added and removed
                if hasattr(file, 'additions') and hasattr(file, 'deletions'):
                    num_plus_lines = file.additions
                    num_minus_lines = file.deletions
                else:
                    patch_lines = patch.splitlines(keepends=True)
                    num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
                    num_minus_lines = len([line for line in patch_lines if line.startswith('-')])

                file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch,
                                                               file.filename, edit_type=edit_type,
                                                               num_plus_lines=num_plus_lines,
                                                               num_minus_lines=num_minus_lines,)
                diff_files.append(file_patch_canonical_structure)
            if invalid_files_names:
                get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")

            self.diff_files = diff_files
            try:
                context["diff_files"] = diff_files
            except Exception:
                pass

            return diff_files

        except Exception as e:
            get_logger().error(f"Failing to get diff files: {e}",
                               artifact={"traceback": traceback.format_exc()})
            raise RateLimitExceeded("Rate limit exceeded for GitHub API.") from e

    def publish_description(self, pr_title: str, pr_body: str):
        self.pr.edit(title=pr_title, body=pr_body)

    def get_latest_commit_url(self) -> str:
        return self.last_commit_id.html_url

    def get_comment_url(self, comment) -> str:
        return comment.html_url

    def publish_persistent_comment(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)

    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        if not self.pr and not self.issue_main:
            get_logger().error("Cannot publish a comment if missing PR/Issue context")
            return None

        if is_temporary and not get_settings().config.publish_output_progress:
            get_logger().debug(f"Skipping publish_comment for temporary comment: {pr_comment}")
            return None
        pr_comment = self.limit_output_characters(pr_comment, self.max_comment_chars)

        # In case this is an issue, can publish the comment on the issue.
        if self.issue_main:
            return self.issue_main.create_comment(pr_comment)

        response = self.pr.create_issue_comment(pr_comment)
        if hasattr(response, "user") and hasattr(response.user, "login"):
            self.github_user_id = response.user.login
        response.is_temporary = is_temporary
        if not hasattr(self.pr, 'comments_list'):
            self.pr.comments_list = []
        self.pr.comments_list.append(response)
        return response

    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        body = self.limit_output_characters(body, self.max_comment_chars)
        self.publish_inline_comments([self.create_inline_comment(body, relevant_file, relevant_line_in_file)])


    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str,
                              absolute_position: int = None):
        body = self.limit_output_characters(body, self.max_comment_chars)
        position, absolute_position = find_line_number_of_relevant_line_in_file(self.diff_files,
                                                                                relevant_file.strip('`'),
                                                                                relevant_line_in_file,
                                                                                absolute_position)
        if position == -1:
            get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
            subject_type = "FILE"
        else:
            subject_type = "LINE"
        path = relevant_file.strip()
        return dict(body=body, path=path, position=position) if subject_type == "LINE" else {}

    def publish_inline_comments(self, comments: list[dict], disable_fallback: bool = False):
        try:
            # publish all comments in a single message
            self.pr.create_review(commit=self.last_commit_id, comments=comments)
        except Exception as e:
            get_logger().info(f"Initially failed to publish inline comments as committable")

            if (getattr(e, "status", None) == 422 and not disable_fallback):
                pass  # continue to try _publish_inline_comments_fallback_with_verification
            else:
                raise e # will end up with publishing the comments one by one

            try:
                self._publish_inline_comments_fallback_with_verification(comments)
            except Exception as e:
                get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
                raise e    
    
    def get_review_thread_comments(self, comment_id: int) -> list[dict]:
        """
        Retrieves all comments in the same thread as the given comment.
        
        Args:
            comment_id: Review comment ID
                
        Returns:
            List of comments in the same thread
        """
        try:
            # Fetch all comments with a single API call
            all_comments = list(self.pr.get_comments())
            
            # Find the target comment by ID
            target_comment = next((c for c in all_comments if c.id == comment_id), None)
            if not target_comment:
                return []
        
            # Get root comment id
            root_comment_id = target_comment.raw_data.get("in_reply_to_id", target_comment.id)
            # Build the thread - include the root comment and all replies to it
            thread_comments = [
                c for c in all_comments if
                c.id == root_comment_id or c.raw_data.get("in_reply_to_id") == root_comment_id
            ]
        
        
            return thread_comments
                
        except Exception as e:
            get_logger().exception(f"Failed to get review comments for an inline ask command", artifact={"comment_id": comment_id, "error": e})
            return []

    def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):
        """
        Check each inline comment separately against the GitHub API and discard of invalid comments,
        then publish all the remaining valid comments in a single review.
        For invalid comments, also try removing the suggestion part and posting the comment just on the first line.
        """
        verified_comments, invalid_comments = self._verify_code_comments(comments)

        # publish as a group the verified comments
        if verified_comments:
            try:
                self.pr.create_review(commit=self.last_commit_id, comments=verified_comments)
            except:
                pass

        # try to publish one by one the invalid comments as a one-line code comment
        if invalid_comments and get_settings().github.try_fix_invalid_inline_comments:
            fixed_comments_as_one_liner = self._try_fix_invalid_inline_comments(
                [comment for comment, _ in invalid_comments])
            for comment in fixed_comments_as_one_liner:
                try:
                    self.publish_inline_comments([comment], disable_fallback=True)
                    get_logger().info(f"Published invalid comment as a single line comment: {comment}")
                except:
                    get_logger().error(f"Failed to publish invalid comment as a single line comment: {comment}")

    def _verify_code_comment(self, comment: dict):
        is_verified = False
        e = None
        try:
            # event ="" # By leaving this blank, you set the review action state to PENDING
            input = dict(commit_id=self.last_commit_id.sha, comments=[comment])
            headers, data = self.pr._requester.requestJsonAndCheck(
                "POST", f"{self.pr.url}/reviews", input=input)
            pending_review_id = data["id"]
            is_verified = True
        except Exception as err:
            is_verified = False
            pending_review_id = None
            e = err
        if pending_review_id is not None:
            try:
                self.pr._requester.requestJsonAndCheck("DELETE", f"{self.pr.url}/reviews/{pending_review_id}")
            except Exception:
                pass
        return is_verified, e

    def _verify_code_comments(self, comments: list[dict]) -> tuple[list[dict], list[tuple[dict, Exception]]]:
        """Very each comment against the GitHub API and return 2 lists: 1 of verified and 1 of invalid comments"""
        verified_comments = []
        invalid_comments = []
        for comment in comments:
            time.sleep(1)  # for avoiding secondary rate limit
            is_verified, e = self._verify_code_comment(comment)
            if is_verified:
                verified_comments.append(comment)
            else:
                invalid_comments.append((comment, e))
        return verified_comments, invalid_comments

    def _try_fix_invalid_inline_comments(self, invalid_comments: list[dict]) -> list[dict]:
        """
        Try fixing invalid comments by removing the suggestion part and setting the comment just on the first line.
        Return only comments that have been modified in some way.
        This is a best-effort attempt to fix invalid comments, and should be verified accordingly.
        """
        import copy
        fixed_comments = []
        for comment in invalid_comments:
            try:
                fixed_comment = copy.deepcopy(comment)  # avoid modifying the original comment dict for later logging
                if "```suggestion" in comment["body"]:
                    fixed_comment["body"] = comment["body"].split("```suggestion")[0]
                if "start_line" in comment:
                    fixed_comment["line"] = comment["start_line"]
                    del fixed_comment["start_line"]
                if "start_side" in comment:
                    fixed_comment["side"] = comment["start_side"]
                    del fixed_comment["start_side"]
                if fixed_comment != comment:
                    fixed_comments.append(fixed_comment)
            except Exception as e:
                get_logger().error(f"Failed to fix inline comment, error: {e}")
        return fixed_comments

    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        """
        Publishes code suggestions as comments on the PR.
        """
        post_parameters_list = []

        code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions)

        for suggestion in code_suggestions_validated:
            body = suggestion['body']
            relevant_file = suggestion['relevant_file']
            relevant_lines_start = suggestion['relevant_lines_start']
            relevant_lines_end = suggestion['relevant_lines_end']

            if not relevant_lines_start or relevant_lines_start == -1:
                get_logger().exception(
                    f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}")
                continue

            if relevant_lines_end < relevant_lines_start:
                get_logger().exception(f"Failed to publish code suggestion, "
                                  f"relevant_lines_end is {relevant_lines_end} and "
                                  f"relevant_lines_start is {relevant_lines_start}")
                continue

            if relevant_lines_end > relevant_lines_start:
                post_parameters = {
                    "body": body,
                    "path": relevant_file,
                    "line": relevant_lines_end,
                    "start_line": relevant_lines_start,
                    "start_side": "RIGHT",
                }
            else:  # API is different for single line comments
                post_parameters = {
                    "body": body,
                    "path": relevant_file,
                    "line": relevant_lines_start,
                    "side": "RIGHT",
                }
            post_parameters_list.append(post_parameters)

        try:
            self.publish_inline_comments(post_parameters_list)
            return True
        except Exception as e:
            get_logger().error(f"Failed to publish code suggestion, error: {e}")
            return False

    def edit_comment(self, comment, body: str):
        try:
            body = self.limit_output_characters(body, self.max_comment_chars)
            comment.edit(body=body)
        except GithubException as e:
            if hasattr(e, "status") and e.status == 403:
                # Log as warning for permission-related issues (usually due to polling)
                get_logger().warning(
                    "Failed to edit github comment due to permission restrictions",
                    artifact={"error": e})
            else:
                get_logger().exception(f"Failed to edit github comment", artifact={"error": e})

    def edit_comment_from_comment_id(self, comment_id: int, body: str):
        try:
            # self.pr.get_issue_comment(comment_id).edit(body)
            body = self.limit_output_characters(body, self.max_comment_chars)
            headers, data_patch = self.pr._requester.requestJsonAndCheck(
                "PATCH", f"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}",
                input={"body": body}
            )
        except Exception as e:
            get_logger().exception(f"Failed to edit comment, error: {e}")

    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
        try:
            # self.pr.get_issue_comment(comment_id).edit(body)
            body = self.limit_output_characters(body, self.max_comment_chars)
            headers, data_patch = self.pr._requester.requestJsonAndCheck(
                "POST", f"{self.base_url}/repos/{self.repo}/pulls/{self.pr_num}/comments/{comment_id}/replies",
                input={"body": body}
            )
        except Exception as e:
            get_logger().exception(f"Failed to reply comment, error: {e}")

    def get_comment_body_from_comment_id(self, comment_id: int):
        try:
            # self.pr.get_issue_comment(comment_id).edit(body)
            headers, data_patch = self.pr._requester.requestJsonAndCheck(
                "GET", f"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}"
            )
            return data_patch.get("body","")
        except Exception as e:
            get_logger().exception(f"Failed to edit comment, error: {e}")
            return None

    def publish_file_comments(self, file_comments: list) -> bool:
        try:
            headers, existing_comments = self.pr._requester.requestJsonAndCheck(
                "GET", f"{self.pr.url}/comments"
            )
            for comment in file_comments:
                comment['commit_id'] = self.last_commit_id.sha
                comment['body'] = self.limit_output_characters(comment['body'], self.max_comment_chars)

                found = False
                for existing_comment in existing_comments:
                    comment['commit_id'] = self.last_commit_id.sha
                    our_app_name = get_settings().get("GITHUB.APP_NAME", "")
                    same_comment_creator = False
                    if self.deployment_type == 'app':
                        same_comment_creator = our_app_name.lower() in existing_comment['user']['login'].lower()
                    elif self.deployment_type == 'user':
                        same_comment_creator = self.github_user_id == existing_comment['user']['login']
                    if existing_comment['subject_type'] == 'file' and comment['path'] == existing_comment['path'] and same_comment_creator:

                        headers, data_patch = self.pr._requester.requestJsonAndCheck(
                            "PATCH", f"{self.base_url}/repos/{self.repo}/pulls/comments/{existing_comment['id']}", input={"body":comment['body']}
                        )
                        found = True
                        break
                if not found:
                    headers, data_post = self.pr._requester.requestJsonAndCheck(
                        "POST", f"{self.pr.url}/comments", input=comment
                    )
            return True
        except Exception as e:
            get_logger().error(f"Failed to publish diffview file summary, error: {e}")
            return False

    def remove_initial_comment(self):
        try:
            for comment in getattr(self.pr, 'comments_list', []):
                if comment.is_temporary:
                    self.remove_comment(comment)
        except Exception as e:
            get_logger().exception(f"Failed to remove initial comment, error: {e}")

    def remove_comment(self, comment):
        try:
            comment.delete()
        except Exception as e:
            get_logger().exception(f"Failed to remove comment, error: {e}")

    def get_title(self):
        return self.pr.title

    def get_languages(self):
        languages = self._get_repo().get_languages()
        return languages

    def get_pr_branch(self):
        return self.pr.head.ref

    def get_pr_owner_id(self) -> str | None:
        if not self.repo:
            return None
        return self.repo.split('/')[0]

    def get_pr_description_full(self):
        return self.pr.body

    def get_user_id(self):
        if not self.github_user_id:
            try:
                self.github_user_id = self.github_client.get_user().raw_data['login']
            except Exception as e:
                self.github_user_id = ""
                # logging.exception(f"Failed to get user id, error: {e}")
        return self.github_user_id

    def get_notifications(self, since: datetime):
        deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user")

        if deployment_type != 'user':
            raise ValueError("Deployment mode must be set to 'user' to get notifications")

        notifications = self.github_client.get_user().get_notifications(since=since)
        return notifications

    def get_issue_comments(self):
        return self.pr.get_issue_comments()

    def get_repo_settings(self):
        try:
            # contents = self.repo_obj.get_contents(".pr_agent.toml", ref=self.pr.head.sha).decoded_content

            # more logical to take 'pr_agent.toml' from the default branch
            contents = self.repo_obj.get_contents(".pr_agent.toml").decoded_content
            return contents
        except Exception:
            return ""

    def get_workspace_name(self):
        return self.repo.split('/')[0]

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        if disable_eyes:
            return None
        try:
            headers, data_patch = self.pr._requester.requestJsonAndCheck(
                "POST", f"{self.base_url}/repos/{self.repo}/issues/comments/{issue_comment_id}/reactions",
                input={"content": "eyes"}
            )
            return data_patch.get("id", None)
        except Exception as e:
            get_logger().warning(f"Failed to add eyes reaction, error: {e}")
            return None

    def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool:
        try:
            # self.pr.get_issue_comment(issue_comment_id).delete_reaction(reaction_id)
            headers, data_patch = self.pr._requester.requestJsonAndCheck(
                "DELETE",
                f"{self.base_url}/repos/{self.repo}/issues/comments/{issue_comment_id}/reactions/{reaction_id}"
            )
            return True
        except Exception as e:
            get_logger().exception(f"Failed to remove eyes reaction, error: {e}")
            return False

    def _parse_pr_url(self, pr_url: str) -> Tuple[str, int]:
        parsed_url = urlparse(pr_url)

        if parsed_url.path.startswith('/api/v3'):
            parsed_url = urlparse(pr_url.replace("/api/v3", ""))

        path_parts = parsed_url.path.strip('/').split('/')
        if 'api.github.com' in parsed_url.netloc or '/api/v3' in pr_url:
            if len(path_parts) < 5 or path_parts[3] != 'pulls':
                raise ValueError("The provided URL does not appear to be a GitHub PR URL")
            repo_name = '/'.join(path_parts[1:3])
            try:
                pr_number = int(path_parts[4])
            except ValueError as e:
                raise ValueError("Unable to convert PR number to integer") from e
            return repo_name, pr_number

        if len(path_parts) < 4 or path_parts[2] != 'pull':
            raise ValueError("The provided URL does not appear to be a GitHub PR URL")

        repo_name = '/'.join(path_parts[:2])
        try:
            pr_number = int(path_parts[3])
        except ValueError as e:
            raise ValueError("Unable to convert PR number to integer") from e

        return repo_name, pr_number

    def _parse_issue_url(self, issue_url: str) -> Tuple[str, int]:
        parsed_url = urlparse(issue_url)

        if parsed_url.path.startswith('/api/v3'): #Check if came from github app
            parsed_url = urlparse(issue_url.replace("/api/v3", ""))

        path_parts = parsed_url.path.strip('/').split('/')
        if 'api.github.com' in parsed_url.netloc or '/api/v3' in issue_url: #Check if came from github app
            if len(path_parts) < 5 or path_parts[3] != 'issues':
                raise ValueError("The provided URL does not appear to be a GitHub ISSUE URL")
            repo_name = '/'.join(path_parts[1:3])
            try:
                issue_number = int(path_parts[4])
            except ValueError as e:
                raise ValueError("Unable to convert issue number to integer") from e
            return repo_name, issue_number

        if len(path_parts) < 4 or path_parts[2] != 'issues':
            raise ValueError("The provided URL does not appear to be a GitHub PR issue")

        repo_name = '/'.join(path_parts[:2])
        try:
            issue_number = int(path_parts[3])
        except ValueError as e:
            raise ValueError("Unable to convert issue number to integer") from e

        return repo_name, issue_number

    def _get_github_client(self):
        self.deployment_type = get_settings().get("GITHUB.DEPLOYMENT_TYPE", "user")
        self.auth = None
        if self.deployment_type == 'app':
            try:
                private_key = get_settings().github.private_key
                app_id = get_settings().github.app_id
            except AttributeError as e:
                raise ValueError("GitHub app ID and private key are required when using GitHub app deployment") from e
            if not self.installation_id:
                raise ValueError("GitHub app installation ID is required when using GitHub app deployment")
            auth = AppAuthentication(app_id=app_id, private_key=private_key,
                                     installation_id=self.installation_id)
            self.auth = auth
        elif self.deployment_type == 'user':
            try:
                token = get_settings().github.user_token
            except AttributeError as e:
                raise ValueError(
                    "GitHub token is required when using user deployment. See: "
                    "https://github.com/Codium-ai/pr-agent#method-2-run-from-source") from e
            self.auth = Auth.Token(token)
        if self.auth:
            return Github(auth=self.auth, base_url=self.base_url)
        else:
            raise ValueError("Could not authenticate to GitHub")

    def _get_repo(self):
        if hasattr(self, 'repo_obj') and \
                hasattr(self.repo_obj, 'full_name') and \
                self.repo_obj.full_name == self.repo:
            return self.repo_obj
        else:
            self.repo_obj = self.github_client.get_repo(self.repo)
            return self.repo_obj


    def _get_pr(self):
        return self._get_repo().get_pull(self.pr_num)

    def get_pr_file_content(self, file_path: str, branch: str) -> str:
        try:
            file_content_str = str(
                self._get_repo()
                .get_contents(file_path, ref=branch)
                .decoded_content.decode()
            )
        except Exception:
            file_content_str = ""
        return file_content_str

    def create_or_update_pr_file(
        self, file_path: str, branch: str, contents="", message=""
    ) -> None:
        try:
            file_obj = self._get_repo().get_contents(file_path, ref=branch)
            sha1=file_obj.sha
        except Exception:
            sha1=""
        self.repo_obj.update_file(
            path=file_path,
            message=message,
            content=contents,
            sha=sha1,
            branch=branch,
        )

    def _get_pr_file_content(self, file: FilePatchInfo, sha: str) -> str:
        return self.get_pr_file_content(file.filename, sha)

    def publish_labels(self, pr_types):
        try:
            label_color_map = {"Bug fix": "1d76db", "Tests": "e99695", "Bug fix with tests": "c5def5",
                               "Enhancement": "bfd4f2", "Documentation": "d4c5f9",
                               "Other": "d1bcf9"}
            post_parameters = []
            for p in pr_types:
                color = label_color_map.get(p, "d1bcf9")  # default to "Other" color
                post_parameters.append({"name": p, "color": color})
            headers, data = self.pr._requester.requestJsonAndCheck(
                "PUT", f"{self.pr.issue_url}/labels", input=post_parameters
            )
        except Exception as e:
            get_logger().warning(f"Failed to publish labels, error: {e}")

    def get_pr_labels(self, update=False):
        try:
            if not update:
                labels =self.pr.labels
                return [label.name for label in labels]
            else: # obtain the latest labels. Maybe they changed while the AI was running
                headers, labels = self.pr._requester.requestJsonAndCheck(
                    "GET", f"{self.pr.issue_url}/labels")
                return [label['name'] for label in labels]

        except Exception as e:
            get_logger().exception(f"Failed to get labels, error: {e}")
            return []

    def get_repo_labels(self):
        labels = self.repo_obj.get_labels()
        return [label for label in itertools.islice(labels, 50)]

    def get_commit_messages(self):
        """
        Retrieves the commit messages of a pull request.

        Returns:
            str: A string containing the commit messages of the pull request.
        """
        max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None)
        try:
            commit_list = self.pr.get_commits()
            commit_messages = [commit.commit.message for commit in commit_list]
            commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages)])
        except Exception:
            commit_messages_str = ""
        if max_tokens:
            commit_messages_str = clip_tokens(commit_messages_str, max_tokens)
        return commit_messages_str

    def generate_link_to_relevant_line_number(self, suggestion) -> str:
        try:
            relevant_file = suggestion['relevant_file'].strip('`').strip("'").strip('\n')
            relevant_line_str = suggestion['relevant_line'].strip('\n')
            if not relevant_line_str:
                return ""

            position, absolute_position = find_line_number_of_relevant_line_in_file \
                (self.diff_files, relevant_file, relevant_line_str)

            if absolute_position != -1:
                # # link to right file only
                # link = f"https://github.com/{self.repo}/blob/{self.pr.head.sha}/{relevant_file}" \
                #        + "#" + f"L{absolute_position}"

                # link to diff
                sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest()
                link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{absolute_position}"
                return link
        except Exception as e:
            get_logger().info(f"Failed adding line link, error: {e}")

        return ""

    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
        sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest()
        if relevant_line_start == -1:
            link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}"
        elif relevant_line_end:
            link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}-R{relevant_line_end}"
        else:
            link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}"
        return link

    def get_lines_link_original_file(self, filepath: str, component_range: Range) -> str:
        """
        Returns the link to the original file on GitHub that corresponds to the given filepath and component range.

        Args:
            filepath (str): The path of the file.
            component_range (Range): The range of lines that represent the component.

        Returns:
            str: The link to the original file on GitHub.

        Example:
            >>> filepath = "path/to/file.py"
            >>> component_range = Range(line_start=10, line_end=20)
            >>> link = get_lines_link_original_file(filepath, component_range)
            >>> print(link)
            "https://github.com/{repo}/blob/{commit_sha}/{filepath}/#L11-L21"
        """
        line_start = component_range.line_start + 1
        line_end = component_range.line_end + 1
        # link = (f"https://github.com/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/"
        #         f"#L{line_start}-L{line_end}")
        link = (f"{self.base_url_html}/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/"
                f"#L{line_start}-L{line_end}")

        return link

    def get_pr_id(self):
        try:
            pr_id = f"{self.repo}/{self.pr_num}"
            return pr_id
        except:
            return ""

    def fetch_sub_issues(self, issue_url):
        """
        Fetch sub-issues linked to the given GitHub issue URL using GraphQL via PyGitHub.
        """
        sub_issues = set()

        # Extract owner, repo, and issue number from URL
        parts = issue_url.rstrip("/").split("/")
        owner, repo, issue_number = parts[-4], parts[-3], parts[-1]

        try:
            # Gets Issue ID from Issue Number
            query = f"""
            query {{
                repository(owner: "{owner}", name: "{repo}") {{
                    issue(number: {issue_number}) {{
                        id
                    }}
                }}
            }}
            """
            response_tuple = self.github_client._Github__requester.requestJson("POST", "/graphql",
                                                                               input={"query": query})

            # Extract the JSON response from the tuple and parses it
            if isinstance(response_tuple, tuple) and len(response_tuple) == 3:
                response_json = json.loads(response_tuple[2])
            else:
                get_logger().error(f"Unexpected response format: {response_tuple}")
                return sub_issues


            issue_id = response_json.get("data", {}).get("repository", {}).get("issue", {}).get("id")

            if not issue_id:
                get_logger().warning(f"Issue ID not found for {issue_url}")
                return sub_issues

            # Fetch Sub-Issues
            sub_issues_query = f"""
            query {{
                node(id: "{issue_id}") {{
                    ... on Issue {{
                        subIssues(first: 10) {{
                            nodes {{
                                url
                            }}
                        }}
                    }}
                }}
            }}
            """
            sub_issues_response_tuple = self.github_client._Github__requester.requestJson("POST", "/graphql", input={
                "query": sub_issues_query})

            # Extract the JSON response from the tuple and parses it
            if isinstance(sub_issues_response_tuple, tuple) and len(sub_issues_response_tuple) == 3:
                sub_issues_response_json = json.loads(sub_issues_response_tuple[2])
            else:
                get_logger().error("Unexpected sub-issues response format", artifact={"response": sub_issues_response_tuple})
                return sub_issues

            if not sub_issues_response_json.get("data", {}).get("node", {}).get("subIssues"):
                get_logger().error("Invalid sub-issues response structure")
                return sub_issues
    
            nodes = sub_issues_response_json.get("data", {}).get("node", {}).get("subIssues", {}).get("nodes", [])
            get_logger().info(f"Github Sub-issues fetched: {len(nodes)}", artifact={"nodes": nodes})

            for sub_issue in nodes:
                if "url" in sub_issue:
                    sub_issues.add(sub_issue["url"])

        except Exception as e:
            get_logger().exception(f"Failed to fetch sub-issues. Error: {e}")

        return sub_issues

    def auto_approve(self) -> bool:
        try:
            res = self.pr.create_review(event="APPROVE")
            if res.state == "APPROVED":
                return True
            return False
        except Exception as e:
            get_logger().exception(f"Failed to auto-approve, error: {e}")
            return False

    def calc_pr_statistics(self, pull_request_data: dict):
            return {}

    def validate_comments_inside_hunks(self, code_suggestions):
        """
        validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub
        """
        code_suggestions_copy = copy.deepcopy(code_suggestions)
        diff_files = self.get_diff_files()
        RE_HUNK_HEADER = re.compile(
            r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")

        diff_files = set_file_languages(diff_files)

        for suggestion in code_suggestions_copy:
            try:
                relevant_file_path = suggestion['relevant_file']
                for file in diff_files:
                    if file.filename == relevant_file_path:

                        # generate on-demand the patches range for the relevant file
                        patch_str = file.patch
                        if not hasattr(file, 'patches_range'):
                            file.patches_range = []
                            patch_lines = patch_str.splitlines()
                            for i, line in enumerate(patch_lines):
                                if line.startswith('@@'):
                                    match = RE_HUNK_HEADER.match(line)
                                    # identify hunk header
                                    if match:
                                        section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
                                        file.patches_range.append({'start': start2, 'end': start2 + size2 - 1})

                        patches_range = file.patches_range
                        comment_start_line = suggestion.get('relevant_lines_start', None)
                        comment_end_line = suggestion.get('relevant_lines_end', None)
                        original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code
                        if not comment_start_line or not comment_end_line or not original_suggestion:
                            continue

                        # check if the comment is inside a valid hunk
                        is_valid_hunk = False
                        min_distance = float('inf')
                        patch_range_min = None
                        # find the hunk that contains the comment, or the closest one
                        for i, patch_range in enumerate(patches_range):
                            d1 = comment_start_line - patch_range['start']
                            d2 = patch_range['end'] - comment_end_line
                            if d1 >= 0 and d2 >= 0:  # found a valid hunk
                                is_valid_hunk = True
                                min_distance = 0
                                patch_range_min = patch_range
                                break
                            elif d1 * d2 <= 0:  # comment is possibly inside the hunk
                                d1_clip = abs(min(0, d1))
                                d2_clip = abs(min(0, d2))
                                d = max(d1_clip, d2_clip)
                                if d < min_distance:
                                    patch_range_min = patch_range
                                    min_distance = min(min_distance, d)
                        if not is_valid_hunk:
                            if min_distance < 10:  # 10 lines - a reasonable distance to consider the comment inside the hunk
                                # make the suggestion non-committable, yet multi line
                                suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start'])
                                suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end'])
                                body = suggestion['body'].strip()

                                # present new diff code in collapsible
                                existing_code = original_suggestion['existing_code'].rstrip() + "\n"
                                improved_code = original_suggestion['improved_code'].rstrip() + "\n"
                                diff = difflib.unified_diff(existing_code.split('\n'),
                                                            improved_code.split('\n'), n=999)
                                patch_orig = "\n".join(diff)
                                patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
                                diff_code = f"\n\n<details><summary>New proposed code:</summary>\n\n```diff\n{patch.rstrip()}\n```"
                                # replace ```suggestion ... ``` with diff_code, using regex:
                                body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL)
                                body += "\n\n</details>"
                                suggestion['body'] = body
                                get_logger().info(f"Comment was moved to a valid hunk, "
                                                  f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
                            else:
                                get_logger().error(f"Comment is not inside a valid hunk, "
                                                   f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}")
            except Exception as e:
                get_logger().error(f"Failed to process patch for committable comment, error: {e}")
        return code_suggestions_copy

    #Clone related
    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
        scheme = "https://"

        #For example, to clone:
        #https://github.com/Codium-ai/pr-agent-pro.git
        #Need to embed inside the github token:
        #https://<token>@github.com/Codium-ai/pr-agent-pro.git

        github_token = self.auth.token
        github_base_url = self.base_url_html
        if not all([github_token, github_base_url]):
            get_logger().error("Either missing auth token or missing base url")
            return None
        if scheme not in github_base_url:
            get_logger().error(f"Base url: {github_base_url} is missing prefix: {scheme}")
            return None
        github_com = github_base_url.split(scheme)[1]  # e.g. 'github.com' or github.<org>.com
        if not github_com:
            get_logger().error(f"Base url: {github_base_url} has an empty base url")
            return None
        if github_com not in repo_url_to_clone:
            get_logger().error(f"url to clone: {repo_url_to_clone} does not contain {github_com}")
            return None
        repo_full_name = repo_url_to_clone.split(github_com)[-1]
        if not repo_full_name:
            get_logger().error(f"url to clone: {repo_url_to_clone} is malformed")
            return None

        clone_url = scheme
        if self.deployment_type == 'app':
            clone_url += "git:"
        clone_url += f"{github_token}@{github_com}{repo_full_name}"
        return clone_url


================================================
FILE: pr_agent/git_providers/gitlab_provider.py
================================================
import difflib
import hashlib
import re
import urllib.parse
from typing import Any, Optional, Tuple, Union
from urllib.parse import parse_qs, urlparse

import gitlab
import requests
from gitlab import (GitlabAuthenticationError, GitlabCreateError,
                    GitlabGetError, GitlabUpdateError)

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo

from ..algo.file_filter import filter_ignored
from ..algo.git_patch_processing import decode_if_bytes
from ..algo.language_handler import is_valid_file
from ..algo.utils import (clip_tokens,
                          find_line_number_of_relevant_line_in_file,
                          load_large_diff)
from ..config_loader import get_settings
from ..log import get_logger
from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider


class DiffNotFoundError(Exception):
    """Raised when the diff for a merge request cannot be found."""
    pass

class GitLabProvider(GitProvider):

    def __init__(self, merge_request_url: Optional[str] = None, incremental: Optional[bool] = False):
        gitlab_url = get_settings().get("GITLAB.URL", None)
        if not gitlab_url:
            raise ValueError("GitLab URL is not set in the config file")
        self.gitlab_url = gitlab_url
        ssl_verify = get_settings().get("GITLAB.SSL_VERIFY", True)
        gitlab_access_token = get_settings().get("GITLAB.PERSONAL_ACCESS_TOKEN", None)
        if not gitlab_access_token:
            raise ValueError("GitLab personal access token is not set in the config file")
        # Authentication method selection via configuration
        auth_method = get_settings().get("GITLAB.AUTH_TYPE", "oauth_token")

        # Basic validation of authentication type
        if auth_method not in ["oauth_token", "private_token"]:
            raise ValueError(f"Unsupported GITLAB.AUTH_TYPE: '{auth_method}'. "
                           f"Must be 'oauth_token' or 'private_token'.")

        # Create GitLab instance based on authentication method
        try:
            if auth_method == "oauth_token":
                self.gl = gitlab.Gitlab(
                    url=gitlab_url,
                    oauth_token=gitlab_access_token,
                    ssl_verify=ssl_verify
                )
            else:  # private_token
                self.gl = gitlab.Gitlab(
                    url=gitlab_url,
                    private_token=gitlab_access_token,
                    ssl_verify=ssl_verify
                )
        except Exception as e:
            get_logger().error(f"Failed to create GitLab instance: {e}")
            raise ValueError(f"Unable to authenticate with GitLab: {e}")
        self.max_comment_chars = 65000
        self.id_project = None
        self.id_mr = None
        self.mr = None
        self.diff_files = None
        self.git_files = None
        self.temp_comments = []
        self._submodule_cache: dict[tuple[str, str, str], list[dict]] = {}
        self.pr_url = merge_request_url
        self._set_merge_request(merge_request_url)
        self.RE_HUNK_HEADER = re.compile(
            r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
        self.incremental = incremental

    # --- submodule expansion helpers (opt-in) ---
    def _get_gitmodules_map(self) -> dict[str, str]:
        """
        Return {submodule_path -> repo_url} from '.gitmodules' (best effort).
        Tries target branch first, then source branch. Always returns text.
        """
        try:
            proj = self.gl.projects.get(self.id_project)
        except Exception:
            return {}

        import base64

        def _read_text(ref: str | None) -> str | None:
            if not ref:
                return None
            try:
                f = proj.files.get(file_path=".gitmodules", ref=ref)
            except Exception:
                return None

            # 1) python-gitlab File.decode() – usually returns BYTES
            try:
                raw = f.decode()
                if isinstance(raw, (bytes, bytearray)):
                    return raw.decode("utf-8", "ignore")
                if isinstance(raw, str):
                    return raw
            except Exception:
                pass

            # 2) fallback: base64 decode f.content
            try:
                c = getattr(f, "content", None)
                if c:
                    return base64.b64decode(c).decode("utf-8", "ignore")
            except Exception:
                pass

            return None

        content = (
            _read_text(getattr(self.mr, "target_branch", None))
            or _read_text(getattr(self.mr, "source_branch", None))
        )
        if not content:
            return {}

        import configparser

        parser = configparser.ConfigParser(
            delimiters=("=",),
            interpolation=None,
            inline_comment_prefixes=("#", ";"),
            strict=False,
        )
        try:
            parser.read_string(content)
        except Exception:
            return {}

        out: dict[str, str] = {}
        for section in parser.sections():
            if not section.lower().startswith("submodule"):
                continue
            path = parser.get(section, "path", fallback=None)
            url = parser.get(section, "url", fallback=None)
            if path and url:
                path = path.strip().strip('"').strip("'")
                url = url.strip().strip('"').strip("'")
                out[path] = url
        return out

    def _url_to_project_path(self, url: str) -> str | None:
        """
        Convert ssh/https GitLab URL to 'group/subgroup/repo' project path.
        """
        try:
            if url.startswith("git@") and ":" in url:
                path = url.split(":", 1)[1]
            else:
                path = urllib.parse.urlparse(url).path.lstrip("/")
            if path.endswith(".git"):
                path = path[:-4]
            return path or None
        except Exception:
            return None

    def _project_by_path(self, proj_path: str):
        """
        Resolve a project by path with multiple strategies:
        1) URL-encoded path_with_namespace
        2) Raw path_with_namespace
        3) Search fallback + exact match on path_with_namespace (case-insensitive)
        Returns a project object or None.
        """
        if not proj_path:
            return None

        # 1) Encoded
        try:
            enc = urllib.parse.quote_plus(proj_path)
            return self.gl.projects.get(enc)
        except Exception:
            pass

        # 2) Raw
        try:
            return self.gl.projects.get(proj_path)
        except Exception:
            pass

        # 3) Search fallback
        try:
            name = proj_path.split("/")[-1]
            # membership=True so we don't leak other people's repos
            matches = self.gl.projects.list(search=name, simple=True, membership=True, per_page=100)
            # prefer exact path_with_namespace match (case-insensitive)
            for p in matches:
                pwn = getattr(p, "path_with_namespace", "")
                if pwn.lower() == proj_path.lower():
                    return self.gl.projects.get(p.id)
            if matches:
                get_logger().warning(f"[submodule] no exact match for {proj_path} (skip)")
        except Exception:
            pass

        return None

    def _compare_submodule(self, proj_path: str, old_sha: str, new_sha: str) -> list[dict]:
        """
        Call repository_compare on submodule project; return list of diffs.
        """
        key = (proj_path, old_sha, new_sha)
        if key in self._submodule_cache:
            return self._submodule_cache[key]
        try:
            proj = self._project_by_path(proj_path)
            if proj is None:
                get_logger().warning(f"[submodule] resolve failed for {proj_path}")
                self._submodule_cache[key] = []
                return []
            cmp = proj.repository_compare(old_sha, new_sha)
            if isinstance(cmp, dict):
                diffs = cmp.get("diffs", []) or []
            else:
                diffs = []
            self._submodule_cache[key] = diffs
            return diffs
        except Exception as e:
            get_logger().warning(f"[submodule] compare failed for {proj_path} {old_sha}..{new_sha}: {e}")
            self._submodule_cache[key] = []
            return []

    def _expand_submodule_changes(self, changes: list[dict]) -> list[dict]:
        """
        If enabled, expand 'Subproject commit' bumps into real file diffs from the submodule.
        Soft-fail on any issue.
        """
        try:
            if not bool(get_settings().get("GITLAB.EXPAND_SUBMODULE_DIFFS", False)):
                return changes
        except Exception:
            return changes

        gitmodules = self._get_gitmodules_map()
        if not gitmodules:
            return changes

        out = list(changes)
        for ch in changes:
            patch = ch.get("diff") or ""
            if "Subproject commit" not in patch:
                continue

            # Extract old/new SHAs from the hunk
            old_m = re.search(r"^-Subproject commit ([0-9a-f]{7,40})", patch, re.M)
            new_m = re.search(r"^\+Subproject commit ([0-9a-f]{7,40})", patch, re.M)
            if not (old_m and new_m):
                continue
            old_sha, new_sha = old_m.group(1), new_m.group(1)

            sub_path = ch.get("new_path") or ch.get("old_path") or ""
            repo_url = gitmodules.get(sub_path)
            if not repo_url:
                get_logger().warning(f"[submodule] no url for '{sub_path}' in .gitmodules (skip)")
                continue

            proj_path = self._url_to_project_path(repo_url)
            if not proj_path:
                get_logger().warning(f"[submodule] cannot parse project path from url '{repo_url}' (skip)")
                continue

            get_logger().info(f"[submodule] {sub_path} url={repo_url} -> proj_path={proj_path}")
            sub_diffs = self._compare_submodule(proj_path, old_sha, new_sha)
            for sd in sub_diffs:
                sd_diff = sd.get("diff") or ""
                sd_old = sd.get("old_path") or sd.get("a_path") or ""
                sd_new = sd.get("new_path") or sd.get("b_path") or sd_old
                out.append({
                    "old_path": f"{sub_path}/{sd_old}" if sd_old else sub_path,
                    "new_path": f"{sub_path}/{sd_new}" if sd_new else sub_path,
                    "diff": sd_diff,
                    "new_file": sd.get("new_file", False),
                    "deleted_file": sd.get("deleted_file", False),
                    "renamed_file": sd.get("renamed_file", False),
                })
        return out

    def is_supported(self, capability: str) -> bool:
        if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments',
            'publish_file_comments']: # gfm_markdown is supported in gitlab !
            return False
        return True

    def _get_project_path_from_pr_or_issue_url(self, pr_or_issue_url: str) -> str:
        repo_project_path = None
        if 'issues' in pr_or_issue_url:
            #replace 'issues' with 'merge_requests', since gitlab provider does not support issue urls, just to get the git repo url:
            pr_or_issue_url = pr_or_issue_url.replace('issues', 'merge_requests')
        if 'merge_requests' in pr_or_issue_url:
            repo_project_path, _ = self._parse_merge_request_url(pr_or_issue_url)
        if not repo_project_path:
            get_logger().error(f"url is not a valid merge requests url: {pr_or_issue_url}")
            return ""
        return repo_project_path

    def get_git_repo_url(self, issues_or_pr_url: str) -> str:
        provider_url = issues_or_pr_url
        repo_path = self._get_project_path_from_pr_or_issue_url(provider_url)
        if not repo_path or repo_path not in issues_or_pr_url:
            get_logger().error(f"Unable to retrieve project path from url: {issues_or_pr_url}")
            return ""
        return f"{issues_or_pr_url.split(repo_path)[0]}{repo_path}.git"

    # Given a git repo url, return prefix and suffix of the provider in order to view a given file belonging to that repo.
    # Example: https://gitlab.com/codiumai/pr-agent.git and branch: t1 -> prefix: "https://gitlab.com/codiumai/pr-agent/-/blob/t1", suffix: "?ref_type=heads"
    # In case git url is not provided, provider will use PR context (which includes branch) to determine the prefix and suffix.
    def get_canonical_url_parts(self, repo_git_url:str=None, desired_branch:str=None) -> Tuple[str, str]:
        repo_path = ""
        if not repo_git_url and not self.pr_url:
            get_logger().error("Cannot get canonical URL parts: missing either context PR URL or a repo GIT URL")
            return ("", "")
        if not repo_git_url: #Use PR url as context
            repo_path = self._get_project_path_from_pr_or_issue_url(self.pr_url)
            try:
                desired_branch = self.gl.projects.get(self.id_project).default_branch
            except Exception as e:
                get_logger().exception(f"Cannot get PR: {self.pr_url} default branch. Tried project ID: {self.id_project}")
                return ("", "")
        else: #Use repo git url
            repo_path = repo_git_url.split('.git')[0].split('.com/')[-1]
        prefix = f"{self.gitlab_url}/{repo_path}/-/blob/{desired_branch}"
        suffix = "?ref_type=heads"  # gitlab cloud adds this suffix. gitlab server does not, but it is harmless.
        return (prefix, suffix)

    @property
    def pr(self):
        '''The GitLab terminology is merge request (MR) instead of pull request (PR)'''
        return self.mr

    def _set_merge_request(self, merge_request_url: str):
        self.id_project, self.id_mr = self._parse_merge_request_url(merge_request_url)
        self.mr = self._get_merge_request()
        try:
            self.last_diff = self.mr.diffs.list(get_all=True)[-1]
        except IndexError as e:
            get_logger().error(f"Could not get diff for merge request {self.id_mr}")
            raise DiffNotFoundError(f"Could not get diff for merge request {self.id_mr}") from e

    def get_pr_file_content(self, file_path: str, branch: str) -> str:
        try:
            file_obj = self.gl.projects.get(self.id_project).files.get(file_path, branch)
            content = file_obj.decode()
            return decode_if_bytes(content)
        except GitlabGetError:
            # In case of file creation the method returns GitlabGetError (404 file not found).
            # In this case we return an empty string for the diff.
            return ''
        except Exception as e:
            get_logger().warning(f"Error retrieving file {file_path} from branch {branch}: {e}")
            return ''

    def create_or_update_pr_file(self, file_path: str, branch: str, contents="", message="") -> None:
        """Create or update a file in the GitLab repository."""
        try:
            project = self.gl.projects.get(self.id_project)

            if not message:
                action = "Update" if contents else "Create"
                message = f"{action} {file_path}"

            try:
                existing_file = project.files.get(file_path, branch)
                existing_file.content = contents
                existing_file.save(branch=branch, commit_message=message)
                get_logger().debug(f"Updated file {file_path} in branch {branch}")
            except GitlabGetError:
                project.files.create({
                    'file_path': file_path,
                    'branch': branch,
                    'content': contents,
                    'commit_message': message
                })
                get_logger().debug(f"Created file {file_path} in branch {branch}")
        except GitlabAuthenticationError as e:
            get_logger().error(f"Authentication failed while creating/updating file {file_path} in branch {branch}: {e}")
            raise
        except (GitlabCreateError, GitlabUpdateError) as e:
            get_logger().error(f"Permission denied or validation error for file {file_path} in branch {branch}: {e}")
            raise
        except Exception as e:
            get_logger().exception(f"Unexpected error creating/updating file {file_path} in branch {branch}: {e}")
            raise

    def get_diff_files(self) -> list[FilePatchInfo]:
        """
        Retrieves the list of files that have been modified, added, deleted, or renamed in a pull request in GitLab,
        along with their content and patch information.

        Returns:
            diff_files (List[FilePatchInfo]): List of FilePatchInfo objects representing the modified, added, deleted,
            or renamed files in the merge request.
        """

        if self.diff_files:
            return self.diff_files

        # filter files using [ignore] patterns
        raw_changes = self.mr.changes().get('changes', [])
        raw_changes = self._expand_submodule_changes(raw_changes)
        diffs_original = raw_changes
        diffs = filter_ignored(diffs_original, 'gitlab')
        if diffs != diffs_original:
            try:
                names_original = [diff['new_path'] for diff in diffs_original]
                names_filtered = [diff['new_path'] for diff in diffs]
                get_logger().info(f"Filtered out [ignore] files for merge request {self.id_mr}", extra={
                    'original_files': names_original,
                    'filtered_files': names_filtered
                })
            except Exception as e:
                pass

        diff_files = []
        invalid_files_names = []
        counter_valid = 0
        for diff in diffs:
            if not is_valid_file(diff['new_path']):
                invalid_files_names.append(diff['new_path'])
                continue

            # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only
            counter_valid += 1
            if counter_valid < MAX_FILES_ALLOWED_FULL or not diff['diff']:
                original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha'])
                new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha'])
            else:
                if counter_valid == MAX_FILES_ALLOWED_FULL:
                    get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files")
                original_file_content_str = ''
                new_file_content_str = ''

            # Ensure content is properly decoded
            original_file_content_str = decode_if_bytes(original_file_content_str)
            new_file_content_str = decode_if_bytes(new_file_content_str)

            edit_type = EDIT_TYPE.MODIFIED
            if diff['new_file']:
                edit_type = EDIT_TYPE.ADDED
            elif diff['deleted_file']:
                edit_type = EDIT_TYPE.DELETED
            elif diff['renamed_file']:
                edit_type = EDIT_TYPE.RENAMED

            filename = diff['new_path']
            patch = diff['diff']
            if not patch:
                patch = load_large_diff(filename, new_file_content_str, original_file_content_str)


            # count number of lines added and removed
            patch_lines = patch.splitlines(keepends=True)
            num_plus_lines = len([line for line in patch_lines if line.startswith('+')])
            num_minus_lines = len([line for line in patch_lines if line.startswith('-')])
            diff_files.append(
                FilePatchInfo(original_file_content_str, new_file_content_str,
                              patch=patch,
                              filename=filename,
                              edit_type=edit_type,
                              old_filename=None if diff['old_path'] == diff['new_path'] else diff['old_path'],
                              num_plus_lines=num_plus_lines,
                              num_minus_lines=num_minus_lines, ))
        if invalid_files_names:
            get_logger().info(f"Filtered out files with invalid extensions: {invalid_files_names}")

        self.diff_files = diff_files
        return diff_files

    def get_files(self) -> list:
        if not self.git_files:
            raw_changes = self.mr.changes().get('changes', [])
            raw_changes = self._expand_submodule_changes(raw_changes)
            self.git_files = [c.get('new_path') for c in raw_changes if c.get('new_path')]
        return self.git_files

    def publish_description(self, pr_title: str, pr_body: str):
        try:
            self.mr.title = pr_title
            self.mr.description = pr_body
            self.mr.save()
        except Exception as e:
            get_logger().exception(f"Could not update merge request {self.id_mr} description: {e}")

    def get_latest_commit_url(self):
        try:
            return self.mr.commits().next().web_url
        except StopIteration: # no commits
            return ""
        except Exception as e:
            get_logger().exception(f"Could not get latest commit URL: {e}")
            return ""

    def get_comment_url(self, comment):
        return f"{self.mr.web_url}#note_{comment.id}"

    def publish_persistent_comment(self, pr_comment: str,
                                   initial_header: str,
                                   update_header: bool = True,
                                   name='review',
                                   final_update_message=True):
        self.publish_persistent_comment_full(pr_comment, initial_header, update_header, name, final_update_message)

    def publish_comment(self, mr_comment: str, is_temporary: bool = False):
        if is_temporary and not get_settings().config.publish_output_progress:
            get_logger().debug(f"Skipping publish_comment for temporary comment: {mr_comment}")
            return None
        mr_comment = self.limit_output_characters(mr_comment, self.max_comment_chars)
        comment = self.mr.notes.create({'body': mr_comment})
        if is_temporary:
            self.temp_comments.append(comment)
        return comment

    def edit_comment(self, comment, body: str):
        body = self.limit_output_characters(body, self.max_comment_chars)
        self.mr.notes.update(comment.id,{'body': body} )

    def edit_comment_from_comment_id(self, comment_id: int, body: str):
        body = self.limit_output_characters(body, self.max_comment_chars)
        comment = self.mr.notes.get(comment_id)
        comment.body = body
        comment.save()

    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
        body = self.limit_output_characters(body, self.max_comment_chars)
        discussion = self.mr.discussions.get(comment_id)
        discussion.notes.create({'body': body})

    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        body = self.limit_output_characters(body, self.max_comment_chars)
        edit_type, found, source_line_no, target_file, target_line_no = self.search_line(relevant_file,
                                                                                         relevant_line_in_file)
        self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
                                 target_file, target_line_no, original_suggestion)

    def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None):
        raise NotImplementedError("Gitlab provider does not support creating inline comments yet")

    def create_inline_comments(self, comments: list[dict]):
        raise NotImplementedError("Gitlab provider does not support publishing inline comments yet")

    def get_comment_body_from_comment_id(self, comment_id: int):
        comment = self.mr.notes.get(comment_id).body
        return comment

    def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_file: str,
                            relevant_line_in_file: str,
                            source_line_no: int, target_file: str, target_line_no: int,
                            original_suggestion=None) -> None:
        if not found:
            get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
        else:
            # in order to have exact sha's we have to find correct diff for this change
            diff = self.get_relevant_diff(relevant_file, relevant_line_in_file)
            if diff is None:
                get_logger().error(f"Could not get diff for merge request {self.id_mr}")
                raise DiffNotFoundError(f"Could not get diff for merge request {self.id_mr}")
            pos_obj = {'position_type': 'text',
                       'new_path': target_file.filename,
                       'old_path': target_file.old_filename if target_file.old_filename else target_file.filename,
                       'base_sha': diff.base_commit_sha, 'start_sha': diff.start_commit_sha, 'head_sha': diff.head_commit_sha}
            if edit_type == 'deletion':
                pos_obj['old_line'] = source_line_no - 1
            elif edit_type == 'addition':
                pos_obj['new_line'] = target_line_no - 1
            else:
                pos_obj['new_line'] = target_line_no - 1
                pos_obj['old_line'] = source_line_no - 1
            get_logger().debug(f"Creating comment in MR {self.id_mr} with body {body} and position {pos_obj}")
            try:
                self.mr.discussions.create({'body': body, 'position': pos_obj})
            except Exception as e:
                try:
                    # fallback - create a general note on the file in the MR
                    if 'suggestion_orig_location' in original_suggestion:
                        line_start = original_suggestion['suggestion_orig_location']['start_line']
                        line_end = original_suggestion['suggestion_orig_location']['end_line']
                        old_code_snippet = original_suggestion['prev_code_snippet']
                        new_code_snippet = original_suggestion['new_code_snippet']
                        content = original_suggestion['suggestion_summary']
                        label = original_suggestion['category']
                        if 'score' in original_suggestion:
                            score = original_suggestion['score']
                        else:
                            score = 7
                    else:
                        line_start = original_suggestion['relevant_lines_start']
                        line_end = original_suggestion['relevant_lines_end']
                        old_code_snippet = original_suggestion['existing_code']
                        new_code_snippet = original_suggestion['improved_code']
                        content = original_suggestion['suggestion_content']
                        label = original_suggestion['label']
                        score = original_suggestion.get('score', 7)

                    if hasattr(self, 'main_language'):
                        language = self.main_language
                    else:
                        language = ''
                    link = self.get_line_link(relevant_file, line_start, line_end)
                    body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n"
                    body_fallback +=f"\n\n<details><summary>[{target_file.filename} [{line_start}-{line_end}]]({link}):</summary>\n\n"
                    body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`"
                    body_fallback+="</details>\n\n"
                    diff_patch = difflib.unified_diff(old_code_snippet.split('\n'),
                                                new_code_snippet.split('\n'), n=999)
                    patch_orig = "\n".join(diff_patch)
                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')
                    diff_code = f"\n\n```diff\n{patch.rstrip()}\n```"
                    body_fallback += diff_code

                    # Create a general note on the file in the MR
                    self.mr.notes.create({
                        'body': body_fallback,
                        'position': {
                            'base_sha': diff.base_commit_sha,
                            'start_sha': diff.start_commit_sha,
                            'head_sha': diff.head_commit_sha,
                            'position_type': 'text',
                            'file_path': f'{target_file.filename}',
                        }
                    })
                    get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}")

                    # get_logger().debug(
                    #     f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)")
                except Exception as e:
                    get_logger().exception(f"Failed to create comment in MR {self.id_mr}")

    def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: str) -> Optional[dict]:
        _changes = self.mr.changes()  # dict
        _changes['changes'] = self._expand_submodule_changes(_changes.get('changes', []))
        changes = _changes
        if not changes:
            get_logger().error('No changes found for the merge request.')
            return None
        all_diffs = self.mr.diffs.list(get_all=True)
        if not all_diffs:
            get_logger().error('No diffs found for the merge request.')
            return None
        for diff in all_diffs:
            for change in changes['changes']:
                if change['new_path'] == relevant_file and relevant_line_in_file in change['diff']:
                    return diff
            get_logger().debug(
                f'No relevant diff found for {relevant_file} {relevant_line_in_file}. Falling back to last diff.')
        return self.last_diff  # fallback to last_diff if no relevant diff is found

    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        for suggestion in code_suggestions:
            try:
                if suggestion and 'original_suggestion' in suggestion:
                    original_suggestion = suggestion['original_suggestion']
                else:
                    original_suggestion = suggestion
                body = suggestion['body']
                relevant_file = suggestion['relevant_file']
                relevant_lines_start = suggestion['relevant_lines_start']
                relevant_lines_end = suggestion['relevant_lines_end']

                diff_files = self.get_diff_files()
                target_file = None
                for file in diff_files:
                    if file.filename == relevant_file:
                        if file.filename == relevant_file:
                            target_file = file
                            break
                range = relevant_lines_end - relevant_lines_start # no need to add 1
                body = body.replace('```suggestion', f'```suggestion:-0+{range}')
                lines = target_file.head_file.splitlines()
                relevant_line_in_file = lines[relevant_lines_start - 1]

                # edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(target_file,
                #                                                                            relevant_line_in_file)
                # for code suggestions, we want to edit the new code
                source_line_no = -1
                target_line_no = relevant_lines_start + 1
                found = True
                edit_type = 'addition'

                self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no,
                                         target_file, target_line_no, original_suggestion)
            except Exception as e:
                get_logger().exception(f"Could not publish code suggestion:\nsuggestion: {suggestion}\nerror: {e}")

        # note that we publish suggestions one-by-one. so, if one fails, the rest will still be published
        return True

    def publish_file_comments(self, file_comments: list) -> bool:
        pass

    def search_line(self, relevant_file, relevant_line_in_file):
        target_file = None

        edit_type = self.get_edit_type(relevant_line_in_file)
        for file in self.get_diff_files():
            if file.filename == relevant_file:
                edit_type, found, source_line_no, target_file, target_line_no = self.find_in_file(file,
                                                                                                  relevant_line_in_file)
        return edit_type, found, source_line_no, target_file, target_line_no

    def find_in_file(self, file, relevant_line_in_file):
        edit_type = 'context'
        source_line_no = 0
        target_line_no = 0
        found = False
        target_file = file
        patch = file.patch
        patch_lines = patch.splitlines()
        for line in patch_lines:
            if line.startswith('@@'):
                match = self.RE_HUNK_HEADER.match(line)
                if not match:
                    continue
                start_old, size_old, start_new, size_new, _ = match.groups()
                source_line_no = int(start_old)
                target_line_no = int(start_new)
                continue
            if line.startswith('-'):
                source_line_no += 1
            elif line.startswith('+'):
                target_line_no += 1
            elif line.startswith(' '):
                source_line_no += 1
                target_line_no += 1
            if relevant_line_in_file in line:
                found = True
                edit_type = self.get_edit_type(line)
                break
            elif relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line:
                # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
                # it's a context line
                found = True
                edit_type = self.get_edit_type(line)
                break
        return edit_type, found, source_line_no, target_file, target_line_no

    def get_edit_type(self, relevant_line_in_file):
        edit_type = 'context'
        if relevant_line_in_file[0] == '-':
            edit_type = 'deletion'
        elif relevant_line_in_file[0] == '+':
            edit_type = 'addition'
        return edit_type

    def remove_initial_comment(self):
        try:
            for comment in self.temp_comments:
                self.remove_comment(comment)
        except Exception as e:
            get_logger().exception(f"Failed to remove temp comments, error: {e}")

    def remove_comment(self, comment):
        try:
            comment.delete()
        except Exception as e:
            get_logger().exception(f"Failed to remove comment, error: {e}")

    def get_title(self):
        return self.mr.title

    def get_languages(self):
        languages = self.gl.projects.get(self.id_project).languages()
        return languages

    def get_pr_branch(self):
        return self.mr.source_branch

    def get_pr_owner_id(self) -> str | None:
        if not self.gitlab_url or 'gitlab.com' in self.gitlab_url:
            if not self.id_project:
                return None
            return self.id_project.split('/')[0]
        # extract host name
        host = urlparse(self.gitlab_url).hostname
        return host

    def get_pr_description_full(self):
        return self.mr.description

    def get_issue_comments(self):
        return self.mr.notes.list(get_all=True)[::-1]

    def get_repo_settings(self):
        try:
            main_branch = self.gl.projects.get(self.id_project).default_branch
            contents = self.gl.projects.get(self.id_project).files.get(file_path='.pr_agent.toml', ref=main_branch).decode()
            return contents
        except Exception:
            return ""

    def get_workspace_name(self):
        return self.id_project.split('/')[0]

    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        if disable_eyes:
            return None
        try:
            if not self.id_mr:
                get_logger().warning("Cannot add eyes reaction: merge request ID is not set.")
                return None

            mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)
            comment = mr.notes.get(issue_comment_id)

            if not comment:
                get_logger().warning(f"Comment with ID {issue_comment_id} not found in merge request {self.id_mr}.")
                return None

            award_emoji = comment.awardemojis.create({
                'name': 'eyes'
            })
            return award_emoji.id
        except Exception as e:
            get_logger().warning(f"Failed to add eyes reaction, error: {e}")
            return None

    def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool:
        try:
            if not self.id_mr:
                get_logger().warning("Cannot remove reaction: merge request ID is not set.")
                return False

            mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)
            comment = mr.notes.get(issue_comment_id)

            if not comment:
                get_logger().warning(f"Comment with ID {issue_comment_id} not found in merge request {self.id_mr}.")
                return False

            reactions = comment.awardemojis.list()
            for reaction in reactions:
                if reaction.name == reaction_id:
                    reaction.delete()
                    return True

            get_logger().warning(f"Reaction '{reaction_id}' not found in comment {issue_comment_id}.")
            return False
        except Exception as e:
            get_logger().warning(f"Failed to remove reaction, error: {e}")
            return False

    def _parse_merge_request_url(self, merge_request_url: str) -> Tuple[str, int]:
        parsed_url = urlparse(merge_request_url)

        path_parts = parsed_url.path.strip('/').split('/')
        if 'merge_requests' not in path_parts:
            raise ValueError("The provided URL does not appear to be a GitLab merge request URL")

        mr_index = path_parts.index('merge_requests')
        # Ensure there is an ID after 'merge_requests'
        if len(path_parts) <= mr_index + 1:
            raise ValueError("The provided URL does not contain a merge request ID")

        try:
            mr_id = int(path_parts[mr_index + 1])
        except ValueError as e:
            raise ValueError("Unable to convert merge request ID to integer") from e

        # Handle special delimiter (-)
        project_path = "/".join(path_parts[:mr_index])
        if project_path.endswith('/-'):
            project_path = project_path[:-2]

        # Return the path before 'merge_requests' and the ID
        return project_path, mr_id

    def _get_merge_request(self):
        mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr)
        return mr

    def get_user_id(self):
        return None

    def publish_labels(self, pr_types):
        try:
            self.mr.labels = list(set(pr_types))
            self.mr.save()
        except Exception as e:
            get_logger().warning(f"Failed to publish labels, error: {e}")

    def publish_inline_comments(self, comments: list[dict]):
        pass

    def get_pr_labels(self, update=False):
        return self.mr.labels

    def get_repo_labels(self):
        return self.gl.projects.get(self.id_project).labels.list()

    def get_commit_messages(self):
        """
        Retrieves the commit messages of a pull request.

        Returns:
            str: A string containing the commit messages of the pull request.
        """
        max_tokens = get_settings().get("CONFIG.MAX_COMMITS_TOKENS", None)
        try:
            commit_messages_list = [commit['message'] for commit in self.mr.commits()._list]
            commit_messages_str = "\n".join([f"{i + 1}. {message}" for i, message in enumerate(commit_messages_list)])
        except Exception:
            commit_messages_str = ""
        if max_tokens:
            commit_messages_str = clip_tokens(commit_messages_str, max_tokens)
        return commit_messages_str

    def get_pr_id(self):
        try:
            pr_id = self.mr.web_url
            return pr_id
        except:
            return ""

    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
        if relevant_line_start == -1:
            link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads"
        elif relevant_line_end:
            link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}-{relevant_line_end}"
        else:
            link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}"
        return link


    def generate_link_to_relevant_line_number(self, suggestion) -> str:
        try:
            relevant_file = suggestion['relevant_file'].strip('`').strip("'").rstrip()
            relevant_line_str = suggestion['relevant_line'].rstrip()
            if not relevant_line_str:
                return ""

            position, absolute_position = find_line_number_of_relevant_line_in_file \
                (self.diff_files, relevant_file, relevant_line_str)

            if absolute_position != -1:
                # link to right file only
                link = f"{self.gl.url}/{self.id_project}/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{absolute_position}"

                # # link to diff
                # sha_file = hashlib.sha1(relevant_file.encode('utf-8')).hexdigest()
                # link = f"{self.pr.web_url}/diffs#{sha_file}_{absolute_position}_{absolute_position}"
                return link
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Failed adding line link, error: {e}")

        return ""
    #Clone related
    def _prepare_clone_url_with_token(self, repo_url_to_clone: str) -> str | None:
        if "gitlab." not in repo_url_to_clone:
            get_logger().error(f"Repo URL: {repo_url_to_clone} is not a valid gitlab URL.")
            return None
        (scheme, base_url) = repo_url_to_clone.split("gitlab.")
        access_token = getattr(self.gl, 'oauth_token', None) or getattr(self.gl, 'private_token', None)
        if not all([scheme, access_token, base_url]):
            get_logger().error(f"Either no access token found, or repo URL: {repo_url_to_clone} "
                               f"is missing prefix: {scheme} and/or base URL: {base_url}.")
            return None

        #Note that the ""official"" method found here:
        # https://docs.gitlab.com/user/profile/personal_access_tokens/#clone-repository-using-personal-access-token
        # requires a username, which may not be applicable.
        # The following solution is taken from: https://stackoverflow.com/questions/25409700/using-gitlab-token-to-clone-without-authentication/35003812#35003812
        # For example: For repo url: https://gitlab.codium-inc.com/qodo/autoscraper.git
        # Then to clone one will issue: 'git clone https://oauth2:<access token>@gitlab.codium-inc.com/qodo/autoscraper.git'

        clone_url = f"{scheme}oauth2:{access_token}@gitlab.{base_url}"
        return clone_url


================================================
FILE: pr_agent/git_providers/local_git_provider.py
================================================
from collections import Counter
from pathlib import Path
from typing import List

from git import Repo

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.config_loader import _find_repository_root, get_settings
from pr_agent.git_providers.git_provider import GitProvider
from pr_agent.log import get_logger


class PullRequestMimic:
    """
    This class mimics the PullRequest class from the PyGithub library for the LocalGitProvider.
    """

    def __init__(self, title: str, diff_files: List[FilePatchInfo]):
        self.title = title
        self.diff_files = diff_files


class LocalGitProvider(GitProvider):
    """
    This class implements the GitProvider interface for local git repositories.
    It mimics the PR functionality of the GitProvider interface,
    but does not require a hosted git repository.
    Instead of providing a PR url, the user provides a local branch path to generate a diff-patch.
    For the MVP it only supports the /review and /describe capabilities.
    """

    def __init__(self, target_branch_name, incremental=False):
        self.repo_path = _find_repository_root()
        if self.repo_path is None:
            raise ValueError('Could not find repository root')
        self.repo = Repo(self.repo_path)
        self.head_branch_name = self.repo.head.ref.name
        self.target_branch_name = target_branch_name
        self._prepare_repo()
        self.diff_files = None
        self.pr = PullRequestMimic(self.get_pr_title(), self.get_diff_files())
        self.description_path = get_settings().get('local.description_path') \
            if get_settings().get('local.description_path') is not None else self.repo_path / 'description.md'
        self.review_path = get_settings().get('local.review_path') \
            if get_settings().get('local.review_path') is not None else self.repo_path / 'review.md'
        # inline code comments are not supported for local git repositories
        get_settings().pr_reviewer.inline_code_comments = False

    def _prepare_repo(self):
        """
        Prepare the repository for PR-mimic generation.
        """
        get_logger().debug('Preparing repository for PR-mimic generation...')
        if self.repo.is_dirty():
            raise ValueError('The repository is not in a clean state. Please commit or stash pending changes.')
        if self.target_branch_name not in self.repo.heads:
            raise KeyError(f'Branch: {self.target_branch_name} does not exist')

    def is_supported(self, capability: str) -> bool:
        if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'get_labels',
                          'gfm_markdown']:
            return False
        return True

    def get_diff_files(self) -> list[FilePatchInfo]:
        diffs = self.repo.head.commit.diff(
            self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),
            create_patch=True,
            R=True
        )
        diff_files = []
        for diff_item in diffs:
            if diff_item.a_blob is not None:
                original_file_content_str = diff_item.a_blob.data_stream.read().decode('utf-8')
            else:
                original_file_content_str = ""  # empty file
            if diff_item.b_blob is not None:
                new_file_content_str = diff_item.b_blob.data_stream.read().decode('utf-8')
            else:
                new_file_content_str = ""  # empty file
            edit_type = EDIT_TYPE.MODIFIED
            if diff_item.new_file:
                edit_type = EDIT_TYPE.ADDED
            elif diff_item.deleted_file:
                edit_type = EDIT_TYPE.DELETED
            elif diff_item.renamed_file:
                edit_type = EDIT_TYPE.RENAMED
            diff_files.append(
                FilePatchInfo(original_file_content_str,
                              new_file_content_str,
                              diff_item.diff.decode('utf-8'),
                              diff_item.b_path,
                              edit_type=edit_type,
                              old_filename=None if diff_item.a_path == diff_item.b_path else diff_item.a_path
                              )
            )
        self.diff_files = diff_files
        return diff_files

    def get_files(self) -> List[str]:
        """
        Returns a list of files with changes in the diff.
        """
        diff_index = self.repo.head.commit.diff(
            self.repo.merge_base(self.repo.head, self.repo.branches[self.target_branch_name]),
            R=True
        )
        # Get the list of changed files
        diff_files = [item.a_path for item in diff_index]
        return diff_files

    def publish_description(self, pr_title: str, pr_body: str):
        with open(self.description_path, "w") as file:
            # Write the string to the file
            file.write(pr_title + '\n' + pr_body)

    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
        with open(self.review_path, "w") as file:
            # Write the string to the file
            file.write(pr_comment)

    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, original_suggestion=None):
        raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')

    def publish_inline_comments(self, comments: list[dict]):
        raise NotImplementedError('Publishing inline comments is not implemented for the local git provider')

    def publish_code_suggestion(self, body: str, relevant_file: str,
                                relevant_lines_start: int, relevant_lines_end: int):
        raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider')

    def publish_code_suggestions(self, code_suggestions: list) -> bool:
        raise NotImplementedError('Publishing code suggestions is not implemented for the local git provider')

    def publish_labels(self, labels):
        pass  # Not applicable to the local git provider, but required by the interface

    def remove_initial_comment(self):
        pass  # Not applicable to the local git provider, but required by the interface

    def remove_comment(self, comment):
        pass  # Not applicable to the local git provider, but required by the interface

    def add_eyes_reaction(self, comment):
        pass  # Not applicable to the local git provider, but required by the interface

    def get_commit_messages(self):
        pass  # Not applicable to the local git provider, but required by the interface

    def get_repo_settings(self):
        pass  # Not applicable to the local git provider, but required by the interface

    def remove_reaction(self, comment):
        pass  # Not applicable to the local git provider, but required by the interface

    def get_languages(self):
        """
        Calculate percentage of languages in repository. Used for hunk prioritisation.
        """
        # Get all files in repository
        filepaths = [Path(item.path) for item in self.repo.tree().traverse() if item.type == 'blob']
        # Identify language by file extension and count
        lang_count = Counter(ext.lstrip('.') for filepath in filepaths for ext in [filepath.suffix.lower()])
        # Convert counts to percentages
        total_files = len(filepaths)
        lang_percentage = {lang: count / total_files * 100 for lang, count in lang_count.items()}
        return lang_percentage

    def get_pr_branch(self):
        return self.repo.head

    def get_user_id(self):
        return -1  # Not used anywhere for the local provider, but required by the interface

    def get_pr_description_full(self):
        commits_diff = list(self.repo.iter_commits(self.target_branch_name + '..HEAD'))
        # Get the commit messages and concatenate
        commit_messages = " ".join([commit.message for commit in commits_diff])
        # TODO Handle the description better - maybe use gpt-3.5 summarisation here?
        return commit_messages[:200]  # Use max 200 characters

    def get_pr_title(self):
        """
        Substitutes the branch-name as the PR-mimic title.
        """
        return self.head_branch_name

    def get_issue_comments(self):
        raise NotImplementedError('Getting issue comments is not implemented for the local git provider')

    def get_pr_labels(self, update=False):
        raise NotImplementedError('Getting labels is not implemented for the local git provider')


================================================
FILE: pr_agent/git_providers/utils.py
================================================
import copy
import os
import tempfile
import traceback

from dynaconf import Dynaconf
from starlette_context import context

from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider_with_context
from pr_agent.log import get_logger


def apply_repo_settings(pr_url):
    os.environ["AUTO_CAST_FOR_DYNACONF"] = "false"
    git_provider = get_git_provider_with_context(pr_url)
    if get_settings().config.use_repo_settings_file:
        repo_settings_file = None
        try:
            try:
                repo_settings = context.get("repo_settings", None)
            except Exception:
                repo_settings = None
                pass
            if repo_settings is None:  # None is different from "", which is a valid value
                repo_settings = git_provider.get_repo_settings()
                try:
                    context["repo_settings"] = repo_settings
                except Exception:
                    pass

            error_local = None
            if repo_settings:
                repo_settings_file = None
                category = 'local'
                try:
                    fd, repo_settings_file = tempfile.mkstemp(suffix='.toml')
                    os.write(fd, repo_settings)

                    try:
                        dynconf_kwargs = {'core_loaders': [],  # DISABLE default loaders, otherwise will load toml files more than once.
                             'loaders': ['pr_agent.custom_merge_loader'],
                             # Use a custom loader to merge sections, but overwrite their overlapping values. Don't involve ENV variables.
                             'merge_enabled': True  # Merge multiple files; ensures [XYZ] sections only overwrite overlapping keys, not whole sections.
                         }

                        new_settings = Dynaconf(settings_files=[repo_settings_file],
                                                # Disable all dynamic loading features
                                                load_dotenv=False,  # Don't load .env files
                                                envvar_prefix=False,  # Drop DYNACONF for env. variables
                                                **dynconf_kwargs
                                                )
                    except TypeError as e:
                        # Fallback for older Dynaconf versions that don't support these parameters
                        get_logger().warning(
                            "Your Dynaconf version does not support disabled 'load_dotenv'/'merge_enabled' parameters. "
                            "Loading repo settings without these security features. "
                            "Please upgrade Dynaconf for better security.",
                            artifact={"error": e, "traceback": traceback.format_exc()})
                        new_settings = Dynaconf(settings_files=[repo_settings_file])

                    for section, contents in new_settings.as_dict().items():
                        if not contents:
                            # Skip excluded items, such as forbidden to load env.
                            get_logger().debug(f"Skipping a section: {section} which is not allowed")
                            continue
                        section_dict = copy.deepcopy(get_settings().as_dict().get(section, {}))
                        for key, value in contents.items():
                            section_dict[key] = value
                        get_settings().unset(section)
                        get_settings().set(section, section_dict, merge=False)
                    get_logger().info(f"Applying repo settings:\n{new_settings.as_dict()}")
                except Exception as e:
                    get_logger().warning(f"Failed to apply repo {category} settings, error: {str(e)}")
                    error_local = {'error': str(e), 'settings': repo_settings, 'category': category}

                if error_local:
                    handle_configurations_errors([error_local], git_provider)
        except Exception as e:
            get_logger().exception("Failed to apply repo settings", e)
        finally:
            if repo_settings_file:
                try:
                    os.remove(repo_settings_file)
                except Exception as e:
                    get_logger().error(f"Failed to remove temporary settings file {repo_settings_file}", e)

    # enable switching models with a short definition
    if get_settings().config.model.lower() == 'claude-3-5-sonnet':
        set_claude_model()


def handle_configurations_errors(config_errors, git_provider):
    try:
        if not any(config_errors):
            return

        for err in config_errors:
            if err:
                configuration_file_content = err['settings'].decode()
                err_message = err['error']
                config_type = err['category']
                header = f"❌ **PR-Agent failed to apply '{config_type}' repo settings**"
                body = f"{header}\n\nThe configuration file needs to be a valid [TOML](https://qodo-merge-docs.qodo.ai/usage-guide/configuration_options/), please fix it.\n\n"
                body += f"___\n\n**Error message:**\n`{err_message}`\n\n"
                if git_provider.is_supported("gfm_markdown"):
                    body += f"\n\n<details><summary>Configuration content:</summary>\n\n```toml\n{configuration_file_content}\n```\n\n</details>"
                else:
                    body += f"\n\n**Configuration content:**\n\n```toml\n{configuration_file_content}\n```\n\n"
                get_logger().warning(f"Sending a 'configuration error' comment to the PR", artifact={'body': body})
                # git_provider.publish_comment(body)
                if hasattr(git_provider, 'publish_persistent_comment'):
                    git_provider.publish_persistent_comment(body,
                                                            initial_header=header,
                                                            update_header=False,
                                                            final_update_message=False)
                else:
                    git_provider.publish_comment(body)
    except Exception as e:
        get_logger().exception(f"Failed to handle configurations errors", e)


def set_claude_model():
    """
    set the claude-sonnet-3.5 model easily (even by users), just by stating: --config.model='claude-3-5-sonnet'
    """
    model_claude = "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0"
    get_settings().set('config.model', model_claude)
    get_settings().set('config.model_weak', model_claude)
    get_settings().set('config.fallback_models', [model_claude])


================================================
FILE: pr_agent/identity_providers/__init__.py
================================================
from pr_agent.config_loader import get_settings
from pr_agent.identity_providers.default_identity_provider import \
    DefaultIdentityProvider

_IDENTITY_PROVIDERS = {
    'default': DefaultIdentityProvider
}


def get_identity_provider():
    identity_provider_id = get_settings().get("CONFIG.IDENTITY_PROVIDER", "default")
    if identity_provider_id not in _IDENTITY_PROVIDERS:
        raise ValueError(f"Unknown identity provider: {identity_provider_id}")
    return _IDENTITY_PROVIDERS[identity_provider_id]()


================================================
FILE: pr_agent/identity_providers/default_identity_provider.py
================================================
from pr_agent.identity_providers.identity_provider import (Eligibility,
                                                           IdentityProvider)


class DefaultIdentityProvider(IdentityProvider):
    def verify_eligibility(self, git_provider, git_provider_id, pr_url):
        return Eligibility.ELIGIBLE

    def inc_invocation_count(self, git_provider, git_provider_id):
        pass


================================================
FILE: pr_agent/identity_providers/identity_provider.py
================================================
from abc import ABC, abstractmethod
from enum import Enum


class Eligibility(Enum):
    NOT_ELIGIBLE = 0
    ELIGIBLE = 1
    TRIAL = 2


class IdentityProvider(ABC):
    @abstractmethod
    def verify_eligibility(self, git_provider, git_provider_id, pr_url):
        pass

    @abstractmethod
    def inc_invocation_count(self, git_provider, git_provider_id):
        pass


================================================
FILE: pr_agent/log/__init__.py
================================================
import os
os.environ["AUTO_CAST_FOR_DYNACONF"] = "false"
import json
import logging
import sys
from enum import Enum

from loguru import logger

from pr_agent.config_loader import get_settings


class LoggingFormat(str, Enum):
    CONSOLE = "CONSOLE"
    JSON = "JSON"


def json_format(record: dict) -> str:
    return record["message"]


def analytics_filter(record: dict) -> bool:
    return record.get("extra", {}).get("analytics", False)


def inv_analytics_filter(record: dict) -> bool:
    return not record.get("extra", {}).get("analytics", False)


def setup_logger(level: str = "INFO", fmt: LoggingFormat = LoggingFormat.CONSOLE):
    level: int = logging.getLevelName(level.upper())
    if type(level) is not int:
        level = logging.INFO

    if fmt == LoggingFormat.JSON and os.getenv("LOG_SANE", "0").lower() == "0":  # better debugging github_app
        logger.remove(None)
        logger.add(
            sys.stdout,
            filter=inv_analytics_filter,
            level=level,
            format="{message}",
            colorize=False,
            serialize=True,
        )
    elif fmt == LoggingFormat.CONSOLE: # does not print the 'extra' fields
        logger.remove(None)
        logger.add(sys.stdout, level=level, colorize=True, filter=inv_analytics_filter)

    log_folder = get_settings().get("CONFIG.ANALYTICS_FOLDER", "")
    if log_folder:
        pid = os.getpid()
        log_file = os.path.join(log_folder, f"pr-agent.{pid}.log")
        logger.add(
            log_file,
            filter=analytics_filter,
            level=level,
            format="{message}",
            colorize=False,
            serialize=True,
        )

    return logger


def get_logger(*args, **kwargs):
    return logger


================================================
FILE: pr_agent/secret_providers/__init__.py
================================================
from pr_agent.config_loader import get_settings


def get_secret_provider():
    if not get_settings().get("CONFIG.SECRET_PROVIDER"):
        return None

    provider_id = get_settings().config.secret_provider
    if provider_id == 'google_cloud_storage':
        try:
            from pr_agent.secret_providers.google_cloud_storage_secret_provider import \
                GoogleCloudStorageSecretProvider
            return GoogleCloudStorageSecretProvider()
        except Exception as e:
            raise ValueError(f"Failed to initialize google_cloud_storage secret provider {provider_id}") from e
    elif provider_id == 'aws_secrets_manager':
        try:
            from pr_agent.secret_providers.aws_secrets_manager_provider import \
                AWSSecretsManagerProvider
            return AWSSecretsManagerProvider()
        except Exception as e:
            raise ValueError(f"Failed to initialize aws_secrets_manager secret provider {provider_id}") from e
    else:
        raise ValueError("Unknown SECRET_PROVIDER")


================================================
FILE: pr_agent/secret_providers/aws_secrets_manager_provider.py
================================================
import json
import boto3
from botocore.exceptions import ClientError

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger
from pr_agent.secret_providers.secret_provider import SecretProvider


class AWSSecretsManagerProvider(SecretProvider):
    def __init__(self):
        try:
            region_name = get_settings().get("aws_secrets_manager.region_name") or \
                         get_settings().get("aws.AWS_REGION_NAME")
            if region_name:
                self.client = boto3.client('secretsmanager', region_name=region_name)
            else:
                self.client = boto3.client('secretsmanager')

            self.secret_arn = get_settings().get("aws_secrets_manager.secret_arn")
            if not self.secret_arn:
                raise ValueError("AWS Secrets Manager ARN is not configured")
        except Exception as e:
            get_logger().error(f"Failed to initialize AWS Secrets Manager Provider: {e}")
            raise e

    def get_secret(self, secret_name: str) -> str:
        """
        Retrieve individual secret by name (for webhook tokens)
        """
        try:
            response = self.client.get_secret_value(SecretId=secret_name)
            return response['SecretString']
        except Exception as e:
            get_logger().warning(f"Failed to get secret {secret_name} from AWS Secrets Manager: {e}")
            return ""

    def get_all_secrets(self) -> dict:
        """
        Retrieve all secrets for configuration override
        """
        try:
            response = self.client.get_secret_value(SecretId=self.secret_arn)
            return json.loads(response['SecretString'])
        except Exception as e:
            get_logger().error(f"Failed to get secrets from AWS Secrets Manager {self.secret_arn}: {e}")
            return {}

    def store_secret(self, secret_name: str, secret_value: str):
        try:
            self.client.put_secret_value(
                SecretId=secret_name,
                SecretString=secret_value
            )
        except Exception as e:
            get_logger().error(f"Failed to store secret {secret_name} in AWS Secrets Manager: {e}")
            raise e 


================================================
FILE: pr_agent/secret_providers/google_cloud_storage_secret_provider.py
================================================
import ujson
from google.cloud import storage

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger
from pr_agent.secret_providers.secret_provider import SecretProvider


class GoogleCloudStorageSecretProvider(SecretProvider):
    def __init__(self):
        try:
            self.client = storage.Client.from_service_account_info(ujson.loads(get_settings().google_cloud_storage.
                                                                               service_account))
            self.bucket_name = get_settings().google_cloud_storage.bucket_name
            self.bucket = self.client.bucket(self.bucket_name)
        except Exception as e:
            get_logger().error(f"Failed to initialize Google Cloud Storage Secret Provider: {e}")
            raise e

    def get_secret(self, secret_name: str) -> str:
        try:
            blob = self.bucket.blob(secret_name)
            return blob.download_as_string()
        except Exception as e:
            get_logger().warning(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}")
            return ""

    def store_secret(self, secret_name: str, secret_value: str):
        try:
            blob = self.bucket.blob(secret_name)
            blob.upload_from_string(secret_value)
        except Exception as e:
            get_logger().error(f"Failed to store secret {secret_name} in Google Cloud Storage: {e}")
            raise e


================================================
FILE: pr_agent/secret_providers/secret_provider.py
================================================
from abc import ABC, abstractmethod


class SecretProvider(ABC):

    @abstractmethod
    def get_secret(self, secret_name: str) -> str:
        pass

    @abstractmethod
    def store_secret(self, secret_name: str, secret_value: str):
        pass


================================================
FILE: pr_agent/servers/__init__.py
================================================


================================================
FILE: pr_agent/servers/atlassian-connect-qodo-merge.json
================================================
{
  "name": "Qodo Merge",
  "description": "Qodo Merge",
  "key": "app_key",
  "vendor": {
    "name": "Qodo",
    "url": "https://qodo.ai"
  },
  "authentication": {
    "type": "jwt"
  },
  "baseUrl": "base_url",
  "lifecycle": {
    "installed": "/installed",
    "uninstalled": "/uninstalled"
  },
  "scopes": [
    "account",
    "repository:write",
    "pullrequest:write",
    "wiki"
  ],
  "contexts": [
    "account"
  ],
  "modules": {
    "webhooks": [
      {
        "event": "*",
        "url": "/webhook"
      }
    ]
  },
  "links": {
    "privacy": "https://qodo.ai/privacy-policy",
    "terms": "https://qodo.ai/terms"
  }
}


================================================
FILE: pr_agent/servers/atlassian-connect.json
================================================
{
  "name": "CodiumAI PR-Agent",
  "description": "CodiumAI PR-Agent",
  "key": "app_key",
  "vendor": {
    "name": "CodiumAI",
    "url": "https://codium.ai"
  },
  "authentication": {
    "type": "jwt"
  },
  "baseUrl": "base_url",
  "lifecycle": {
    "installed": "/installed",
    "uninstalled": "/uninstalled"
  },
  "scopes": [
    "account",
    "repository:write",
    "pullrequest:write",
    "wiki"
  ],
  "contexts": [
    "account"
  ],
  "modules": {
    "webhooks": [
      {
        "event": "*",
        "url": "/webhook"
      }
    ]
  },
  "links": {
    "privacy": "https://qodo.ai/privacy-policy",
    "terms": "https://qodo.ai/terms"
  }
}


================================================
FILE: pr_agent/servers/azuredevops_server_webhook.py
================================================
# This file contains the code for the Azure DevOps Server webhook server.
# The server listens for incoming webhooks from Azure DevOps Server and forwards them to the PR Agent.
# ADO webhook documentation: https://learn.microsoft.com/en-us/azure/devops/service-hooks/services/webhooks?view=azure-devops

import json
import os
import re
import secrets
from urllib.parse import unquote

import uvicorn
from fastapi import APIRouter, Depends, FastAPI, HTTPException, Request
from fastapi.encoders import jsonable_encoder
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from starlette import status
from starlette.background import BackgroundTasks
from starlette.middleware import Middleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent, command2class
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider_with_context
from pr_agent.git_providers.azuredevops_provider import AzureDevopsProvider
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import LoggingFormat, get_logger, setup_logger

setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
security = HTTPBasic(auto_error=False)
router = APIRouter()
available_commands_rgx = re.compile(r"^\/(" + "|".join(command2class.keys()) + r")\s*")
azure_devops_server = get_settings().get("azure_devops_server")
WEBHOOK_USERNAME = azure_devops_server.get("webhook_username", None)
WEBHOOK_PASSWORD = azure_devops_server.get("webhook_password", None)

async def handle_request_comment(url: str, body: str, thread_id: int, comment_id: int, log_context: dict):
    log_context["action"] = body
    log_context["api_url"] = url
    try:
        with get_logger().contextualize(**log_context):
            agent = PRAgent()
            provider = get_git_provider_with_context(pr_url=url)
            body = handle_line_comment(body, thread_id, provider)
            handled = await agent.handle_request(url, body, notify=lambda: provider.reply_to_thread(thread_id, "On it! ⏳", True))
            # mark command comment as closed
            if handled:
                provider.set_thread_status(thread_id, "closed")
                provider.remove_initial_comment()
    except Exception as e:
        get_logger().exception(f"Failed to handle webhook", artifact={"url": url, "body": body}, error=str(e))

def handle_line_comment(body: str, thread_id: int, provider: AzureDevopsProvider):
    body = body.strip()
    if not body.startswith('/ask '):
        return body
    thread_context = provider.get_thread_context(thread_id)
    if not thread_context:
        return body
    
    path = thread_context.file_path
    if thread_context.left_file_end or thread_context.left_file_start:
        start_line = thread_context.left_file_start.line
        end_line = thread_context.left_file_end.line
        side = "left"
    elif thread_context.right_file_end or thread_context.right_file_start:
        start_line = thread_context.right_file_start.line
        end_line = thread_context.right_file_end.line
        side = "right"
    else:
        get_logger().info("No line range found in thread context", artifact={"thread_context": thread_context})
        return body
    
    question = body[5:].lstrip() # remove 4 chars: '/ask '
    return f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={thread_id} {question}"

# currently only basic auth is supported with azure webhooks
# for this reason, https must be enabled to ensure the credentials are not sent in clear text
def authorize(credentials: HTTPBasicCredentials = Depends(security)):
    if WEBHOOK_USERNAME is None or WEBHOOK_PASSWORD is None:
        return
    
    is_user_ok = secrets.compare_digest(credentials.username, WEBHOOK_USERNAME)
    is_pass_ok = secrets.compare_digest(credentials.password, WEBHOOK_PASSWORD)
    if not (is_user_ok and is_pass_ok):
        raise HTTPException(
            status_code=status.HTTP_401_UNAUTHORIZED,
            detail='Incorrect username or password.',
            headers={'WWW-Authenticate': 'Basic'},
        )


async def _perform_commands_azure(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict):
    apply_repo_settings(api_url)
    if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled
        get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}", **log_context)
        return
    commands = get_settings().get(f"azure_devops_server.{commands_conf}")
    if not commands:
        return

    get_settings().set("config.is_auto_command", True)
    for command in commands:
        try:
            split_command = command.split(" ")
            command = split_command[0]
            args = split_command[1:]
            other_args = update_settings_from_args(args)
            new_command = ' '.join([command] + other_args)
            get_logger().info(f"Performing command: {new_command}")
            with get_logger().contextualize(**log_context):
                await agent.handle_request(api_url, new_command)
        except Exception as e:
            get_logger().error(f"Failed to perform command {command}: {e}")


async def handle_request_azure(data, log_context):
    if data["eventType"] == "git.pullrequest.created":
        # API V1 (latest)
        pr_url = unquote(data["resource"]["_links"]["web"]["href"].replace("_apis/git/repositories", "_git"))
        log_context["event"] = data["eventType"]
        log_context["api_url"] = pr_url
        await _perform_commands_azure("pr_commands", PRAgent(), pr_url, log_context)
        return JSONResponse(
            status_code=status.HTTP_202_ACCEPTED,
            content=jsonable_encoder({"message": "webhook triggered successfully"})
        )
    elif data["eventType"] == "ms.vss-code.git-pullrequest-comment-event" and "content" in data["resource"]["comment"]:
        comment = data["resource"]["comment"]
        if available_commands_rgx.match(comment["content"]):
            if(data["resourceVersion"] == "2.0"):
                repo = data["resource"]["pullRequest"]["repository"]["webUrl"]
                pr_url = unquote(f'{repo}/pullrequest/{data["resource"]["pullRequest"]["pullRequestId"]}')
                action = comment["content"]
                thread_url = comment["_links"]["threads"]["href"]
                thread_id = int(thread_url.split("/")[-1])
                comment_id = int(comment["id"])
                pass
            else:
                # API V1 not supported as it does not contain the PR URL
                return JSONResponse(
                    status_code=status.HTTP_400_BAD_REQUEST,
                    content=json.dumps({"message": "version 1.0 webhook for Azure Devops PR comment is not supported. please upgrade to version 2.0"})),
        else:
            return JSONResponse(
                status_code=status.HTTP_400_BAD_REQUEST,
                content=json.dumps({"message": "Unsupported command"}),
            )
    else:
        return JSONResponse(
            status_code=status.HTTP_204_NO_CONTENT,
            content=json.dumps({"message": "Unsupported event"}),
        )

    log_context["event"] = data["eventType"]
    log_context["api_url"] = pr_url

    try:
        await handle_request_comment(pr_url, action, thread_id, comment_id, log_context)
    except Exception as e:
        get_logger().error("Azure DevOps Trigger failed. Error:" + str(e))
        return JSONResponse(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            content=json.dumps({"message": "Internal server error"}),
        )
    return JSONResponse(
        status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({"message": "webhook triggered successfully"})
    )

@router.post("/", dependencies=[Depends(authorize)])
async def handle_webhook(background_tasks: BackgroundTasks, request: Request):
    log_context = {"server_type": "azure_devops_server"}
    data = await request.json()
    # get_logger().info(json.dumps(data))

    background_tasks.add_task(handle_request_azure, data, log_context)

    return JSONResponse(
        status_code=status.HTTP_202_ACCEPTED, content=jsonable_encoder({"message": "webhook triggered successfully"})
    )

@router.get("/")
async def root():
    return {"status": "ok"}

def start():
    app = FastAPI(middleware=[Middleware(RawContextMiddleware)])
    app.include_router(router)
    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000")))

if __name__ == "__main__":
    start()


================================================
FILE: pr_agent/servers/bitbucket_app.py
================================================
import base64
import copy
import hashlib
import json
import os
import re
import time

import jwt
import requests
import uvicorn
from fastapi import APIRouter, FastAPI, Request, Response
from starlette.background import BackgroundTasks
from starlette.middleware import Middleware
from starlette.responses import JSONResponse
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.identity_providers import get_identity_provider
from pr_agent.identity_providers.identity_provider import Eligibility
from pr_agent.log import LoggingFormat, get_logger, setup_logger
from pr_agent.secret_providers import get_secret_provider

setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
router = APIRouter()
secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None


async def get_bearer_token(shared_secret: str, client_key: str):
    try:
        now = int(time.time())
        url = "https://bitbucket.org/site/oauth2/access_token"
        canonical_url = "GET&/site/oauth2/access_token&"
        qsh = hashlib.sha256(canonical_url.encode("utf-8")).hexdigest()
        app_key = get_settings().bitbucket.app_key

        payload = {
            "iss": app_key,
            "iat": now,
            "exp": now + 240,
            "qsh": qsh,
            "sub": client_key,
            }
        token = jwt.encode(payload, shared_secret, algorithm="HS256")
        payload = 'grant_type=urn%3Abitbucket%3Aoauth2%3Ajwt'
        headers = {
            'Authorization': f'JWT {token}',
            'Content-Type': 'application/x-www-form-urlencoded'
        }
        response = requests.request("POST", url, headers=headers, data=payload)
        bearer_token = response.json()["access_token"]
        return bearer_token
    except Exception as e:
        get_logger().error(f"Failed to get bearer token: {e}")
        raise e

@router.get("/")
async def handle_manifest(request: Request, response: Response):
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    manifest = open(os.path.join(cur_dir, "atlassian-connect.json"), "rt").read()
    try:
        manifest = manifest.replace("app_key", get_settings().bitbucket.app_key)
        manifest = manifest.replace("base_url", get_settings().bitbucket.base_url)
    except:
        get_logger().error("Failed to replace api_key in Bitbucket manifest, trying to continue")
    manifest_obj = json.loads(manifest)
    return JSONResponse(manifest_obj)


def _get_username(data):
    actor = data.get("data", {}).get("actor", {})
    if actor:
        if "username" in actor:
            return actor["username"]
        elif "display_name" in actor:
            return actor["display_name"]
        elif "nickname" in actor:
            return actor["nickname"]
    return ""


async def _validate_time_from_last_commit_to_pr_update(data: dict) -> bool:
    is_valid_push = False
    try:
        data_inner = data.get('data', {})
        if not data_inner:
            get_logger().error("No data found in the webhook payload")
            return True
        pull_request = data_inner.get('pullrequest', {})
        commits_api = pull_request.get('links', {}).get('commits', {}).get('href')
        if not commits_api:
            return False
        if not pull_request.get('updated_on'):
            return False
        bearer_token = context.get('bitbucket_bearer_token')
        headers = {
            'Authorization': f'Bearer {bearer_token}',
            'Accept': 'application/json'
        }
        response = requests.get(commits_api, headers=headers)
        if response.status_code != 200:
            get_logger().warning(f"Bitbucket commits API returned {response.status_code} for {commits_api}")
            return False

        username =_get_username(data)
        commits_data = response.json() or {}
        values = commits_data.get('values') or []
        if (not values or not isinstance(values, list) or not values[0].get('author') or not values[0]['author'].get('user')
                or not values[0]['author']['user'].get('display_name')):
            get_logger().warning("No commits returned for pull request or one of the required fields missing; skipping push validation",
                                 artifact={'values': values})
            return False
        commit_username = commits_data['values'][0]['author']['user']['display_name']
        if username != commit_username:
            get_logger().warning(f"Mismatch in username {username} vs. commit_username {commit_username}")
            return False

        time_pr_updated = pull_request['updated_on']
        time_last_commit = commits_data['values'][0]['date']
        from datetime import datetime
        ts1 = datetime.fromisoformat(time_pr_updated)
        ts2 = datetime.fromisoformat(time_last_commit)
        diff = (ts1 - ts2).total_seconds()
        max_delta_seconds = 15
        if diff > 0 and diff < max_delta_seconds:
            is_valid_push = True
        else:
            get_logger().debug(f"Too much time passed since last commit",
                               artifact={'updated': time_pr_updated, 'last_commit': time_last_commit})
    except Exception as e:
        get_logger().exception(f"Failed to validate time difference between last commit and PR update",
                               artifact={'error': e, 'data': data})
    return is_valid_push

async def _perform_commands_bitbucket(commands_conf: str, agent: PRAgent, api_url: str, log_context: dict, data: dict):
    apply_repo_settings(api_url)
    if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled
        get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}")
        return
    if commands_conf == "push_commands":
        if not get_settings().get("bitbucket_app.handle_push_trigger"):
            get_logger().info(
                "Bitbucket push trigger handling disabled via config; skipping push commands")
            return
    if data.get("event", "") == "pullrequest:created":
        if not should_process_pr_logic(data):
            return
    commands = get_settings().get(f"bitbucket_app.{commands_conf}", {})
    get_settings().set("config.is_auto_command", True)
    if commands_conf == "push_commands":
        is_valid_push = await _validate_time_from_last_commit_to_pr_update(data)
        if not is_valid_push:
            get_logger().info(f"Bitbucket skipping 'pullrequest:updated' for push commands")
            return
    for command in commands:
        try:
            split_command = command.split(" ")
            command = split_command[0]
            args = split_command[1:]
            other_args = update_settings_from_args(args)
            new_command = ' '.join([command] + other_args)
            get_logger().info(f"Performing command: {new_command}")
            with get_logger().contextualize(**log_context):
                await agent.handle_request(api_url, new_command)
        except Exception as e:
            get_logger().error(f"Failed to perform command {command}: {e}")


def is_bot_user(data) -> bool:
    try:
        actor = data.get("data", {}).get("actor", {})
        # allow actor type: user . if it's "AppUser" or "team" then it is a bot user
        allowed_actor_types = {"user"}
        if actor and actor["type"].lower() not in allowed_actor_types:
            get_logger().info(f"BitBucket actor type is not 'user', skipping: {actor}")
            return True
    except Exception as e:
        get_logger().error(f"Failed 'is_bot_user' logic: {e}")
    return False


def should_process_pr_logic(data) -> bool:
    try:
        pr_data = data.get("data", {}).get("pullrequest", {})
        title = pr_data.get("title", "")
        source_branch = pr_data.get("source", {}).get("branch", {}).get("name", "")
        target_branch = pr_data.get("destination", {}).get("branch", {}).get("name", "")
        sender = _get_username(data)
        repo_full_name = pr_data.get("destination", {}).get("repository", {}).get("full_name", "")

        # logic to ignore PRs from specific repositories
        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
        if repo_full_name and ignore_repos:
            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
                get_logger().info(f"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
        if ignore_pr_users and sender:
            if any(re.search(regex, sender) for regex in ignore_pr_users):
                get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting")
                return False

        # logic to ignore PRs with specific titles
        if title:
            ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
            if not isinstance(ignore_pr_title_re, list):
                ignore_pr_title_re = [ignore_pr_title_re]
            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
                get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting")
                return False

        ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
        ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
        if (ignore_pr_source_branches or ignore_pr_target_branches):
            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
                get_logger().info(
                    f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings")
                return False
            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
                get_logger().info(
                    f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings")
                return False
    except Exception as e:
        get_logger().error(f"Failed 'should_process_pr_logic': {e}")
    return True


@router.post("/webhook")
async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request):
    app_name = get_settings().get("CONFIG.APP_NAME", "Unknown")
    log_context = {"server_type": "bitbucket_app", "app_name": app_name}
    get_logger().debug(request.headers)
    jwt_header = request.headers.get("authorization", None)
    if jwt_header:
        input_jwt = jwt_header.split(" ")[1]
    data = await request.json()
    get_logger().debug(data)

    async def inner():
        try:
            # ignore bot users
            if is_bot_user(data):
                return "OK"

            # Check if the PR should be processed
            if data.get("event", "") == "pullrequest:created":
                if not should_process_pr_logic(data):
                    return "OK"

            # Get the username of the sender
            log_context["sender"] = _get_username(data)

            sender_id = data.get("data", {}).get("actor", {}).get("account_id", "")
            log_context["sender_id"] = sender_id
            jwt_parts = input_jwt.split(".")
            claim_part = jwt_parts[1]
            claim_part += "=" * (-len(claim_part) % 4)
            decoded_claims = base64.urlsafe_b64decode(claim_part)
            claims = json.loads(decoded_claims)
            client_key = claims["iss"]
            secrets = json.loads(secret_provider.get_secret(client_key))
            shared_secret = secrets["shared_secret"]
            jwt.decode(input_jwt, shared_secret, audience=client_key, algorithms=["HS256"])
            bearer_token = await get_bearer_token(shared_secret, client_key)
            context['bitbucket_bearer_token'] = bearer_token
            context["settings"] = copy.deepcopy(global_settings)
            event = data["event"]
            agent = PRAgent()
            if event == "pullrequest:created":
                pr_url = data["data"]["pullrequest"]["links"]["html"]["href"]
                log_context["api_url"] = pr_url
                log_context["event"] = "pull_request"
                if pr_url:
                    with get_logger().contextualize(**log_context):
                        if get_identity_provider().verify_eligibility("bitbucket",
                                                        sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:
                            if get_settings().get("bitbucket_app.pr_commands"):
                                await _perform_commands_bitbucket("pr_commands", agent, pr_url, log_context, data)
            elif event == "pullrequest:updated": # PR updated, might be from a push (we will validate this later)
                pr_url = data["data"]["pullrequest"]["links"]["html"]["href"]
                log_context["api_url"] = pr_url
                log_context["event"] = "pull_request"
                if pr_url:
                    with get_logger().contextualize(**log_context):
                        if get_identity_provider().verify_eligibility("bitbucket",
                                                        sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:

                            if get_settings().get("bitbucket_app.push_commands"):
                                await _perform_commands_bitbucket("push_commands", agent, pr_url, log_context, data)
            elif event == "pullrequest:comment_created":
                pr_url = data["data"]["pullrequest"]["links"]["html"]["href"]
                log_context["api_url"] = pr_url
                log_context["event"] = "comment"
                comment_body = data["data"]["comment"]["content"]["raw"]
                with get_logger().contextualize(**log_context):
                    if get_identity_provider().verify_eligibility("bitbucket",
                                                                     sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:
                        await agent.handle_request(pr_url, comment_body)
        except Exception as e:
            get_logger().error(f"Failed to handle webhook: {e}")
    background_tasks.add_task(inner)
    return "OK"

@router.get("/webhook")
async def handle_github_webhooks(request: Request, response: Response):
    return "Webhook server online!"

@router.post("/installed")
async def handle_installed_webhooks(request: Request, response: Response):
    try:
        get_logger().info("handle_installed_webhooks")
        get_logger().info(request.headers)
        data = await request.json()
        get_logger().info(data)
        shared_secret = data["sharedSecret"]
        client_key = data["clientKey"]
        username = data["principal"]["username"]
        secrets = {
            "shared_secret": shared_secret,
            "client_key": client_key
        }
        secret_provider.store_secret(username, json.dumps(secrets))
    except Exception as e:
        get_logger().error(f"Failed to register user: {e}")
        return JSONResponse({"error": "Unable to register user"}, status_code=500)

@router.post("/uninstalled")
async def handle_uninstalled_webhooks(request: Request, response: Response):
    get_logger().info("handle_uninstalled_webhooks")

    data = await request.json()
    get_logger().info(data)


def start():
    get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False)
    get_settings().set("CONFIG.GIT_PROVIDER", "bitbucket")
    get_settings().set("PR_DESCRIPTION.PUBLISH_DESCRIPTION_AS_COMMENT", True)
    middleware = [Middleware(RawContextMiddleware)]
    app = FastAPI(middleware=middleware)
    app.include_router(router)

    uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "3000")))


if __name__ == '__main__':
    start()


================================================
FILE: pr_agent/servers/bitbucket_server_webhook.py
================================================
import ast
import json
import os
import re
from typing import List

import uvicorn
from fastapi import APIRouter, FastAPI
from fastapi.encoders import jsonable_encoder
from fastapi.responses import RedirectResponse
from starlette import status
from starlette.background import BackgroundTasks
from starlette.middleware import Middleware
from starlette.requests import Request
from starlette.responses import JSONResponse
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import LoggingFormat, get_logger, setup_logger
from pr_agent.servers.utils import verify_signature

setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
router = APIRouter()


def handle_request(
    background_tasks: BackgroundTasks, url: str, body: str, log_context: dict
):
    log_context["action"] = body
    log_context["api_url"] = url

    async def inner():
        try:
            with get_logger().contextualize(**log_context):
                await PRAgent().handle_request(url, body)
        except Exception as e:
            get_logger().error(f"Failed to handle webhook: {e}")

    background_tasks.add_task(inner)

def should_process_pr_logic(data) -> bool:
    try:
        pr_data = data.get("pullRequest", {})
        title = pr_data.get("title", "")
        
        from_ref = pr_data.get("fromRef", {})
        source_branch = from_ref.get("displayId", "") if from_ref else ""
        
        to_ref = pr_data.get("toRef", {})
        target_branch = to_ref.get("displayId", "") if to_ref else ""
        
        author = pr_data.get("author", {})
        user = author.get("user", {}) if author else {}
        sender = user.get("name", "") if user else ""
        
        repository = to_ref.get("repository", {}) if to_ref else {}
        project = repository.get("project", {}) if repository else {}
        project_key = project.get("key", "") if project else ""
        repo_slug = repository.get("slug", "") if repository else ""
        
        repo_full_name = f"{project_key}/{repo_slug}" if project_key and repo_slug else ""
        pr_id = pr_data.get("id", None)

        # To ignore PRs from specific repositories
        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
        if repo_full_name and ignore_repos:
            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
                get_logger().info(f"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
                return False

        # To ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
        if ignore_pr_users and sender:
            if any(re.search(regex, sender) for regex in ignore_pr_users):
                get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting")
                return False

        # To ignore PRs with specific titles
        if title:
            ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
            if not isinstance(ignore_pr_title_re, list):
                ignore_pr_title_re = [ignore_pr_title_re]
            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
                get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting")
                return False

        ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
        ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
        if (ignore_pr_source_branches or ignore_pr_target_branches):
            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
                get_logger().info(
                    f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings")
                return False
            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
                get_logger().info(
                    f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings")
                return False

        # Allow_only_specific_folders
        allowed_folders = get_settings().config.get("allow_only_specific_folders", [])
        if allowed_folders and pr_id and project_key and repo_slug:
            from pr_agent.git_providers.bitbucket_server_provider import BitbucketServerProvider
            bitbucket_server_url = get_settings().get("BITBUCKET_SERVER.URL", "")
            pr_url = f"{bitbucket_server_url}/projects/{project_key}/repos/{repo_slug}/pull-requests/{pr_id}"
            provider = BitbucketServerProvider(pr_url=pr_url)
            changed_files = provider.get_files()
            if changed_files:
                # Check if ALL files are outside allowed folders
                all_files_outside = True
                for file_path in changed_files:
                    if any(file_path.startswith(folder) for folder in allowed_folders):
                        all_files_outside = False
                        break
                
                if all_files_outside:
                    get_logger().info(f"Ignoring PR because all files {changed_files} are outside allowed folders {allowed_folders}")
                    return False
    except Exception as e:
        get_logger().error(f"Failed 'should_process_pr_logic': {e}")
        return True # On exception - we continue. Otherwise, we could just end up with filtering all PRs
    return True

@router.post("/")
async def redirect_to_webhook():
    return RedirectResponse(url="/webhook")

@router.post("/webhook")
async def handle_webhook(background_tasks: BackgroundTasks, request: Request):
    log_context = {"server_type": "bitbucket_server"}
    data = await request.json()
    get_logger().info(json.dumps(data))

    webhook_secret = get_settings().get("BITBUCKET_SERVER.WEBHOOK_SECRET", None)
    if webhook_secret:
        body_bytes = await request.body()
        if body_bytes.decode('utf-8') == '{"test": true}':
            return JSONResponse(
                status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "connection test successful"})
            )
        signature_header = request.headers.get("x-hub-signature", None)
        verify_signature(body_bytes, webhook_secret, signature_header)

    pr_id = data["pullRequest"]["id"]
    repository_name = data["pullRequest"]["toRef"]["repository"]["slug"]
    project_name = data["pullRequest"]["toRef"]["repository"]["project"]["key"]
    bitbucket_server = get_settings().get("BITBUCKET_SERVER.URL")
    pr_url = f"{bitbucket_server}/projects/{project_name}/repos/{repository_name}/pull-requests/{pr_id}"

    log_context["api_url"] = pr_url
    log_context["event"] = "pull_request"

    commands_to_run = []

    if (data["eventKey"] == "pr:opened"
            or (data["eventKey"] == "repo:refs_changed" and data.get("pullRequest", {}).get("id", -1) != -1)):  # push event; -1 for push unassigned to a PR: #Check auto commands for creation/updating
        apply_repo_settings(pr_url)
        if not should_process_pr_logic(data):
            get_logger().info(f"PR ignored due to config settings", **log_context)
            return JSONResponse(
                status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "PR ignored by config"})
            )
        if get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled
            get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {pr_url}", **log_context)
            return JSONResponse(
                status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "PR ignored due to auto feedback not enabled"})
            )
        get_settings().set("config.is_auto_command", True)
        if data["eventKey"] == "pr:opened":
            commands_to_run.extend(_get_commands_list_from_settings('BITBUCKET_SERVER.PR_COMMANDS'))
        else: #Has to be: data["eventKey"] == "pr:from_ref_updated"
            if not get_settings().get("BITBUCKET_SERVER.HANDLE_PUSH_TRIGGER"):
                get_logger().info(f"Push trigger is disabled, skipping push commands for PR {pr_url}", **log_context)
                return JSONResponse(
                    status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "PR ignored due to push trigger not enabled"})
                )

            get_settings().set("config.is_new_pr", False)
            commands_to_run.extend(_get_commands_list_from_settings('BITBUCKET_SERVER.PUSH_COMMANDS'))
    elif data["eventKey"] == "pr:comment:added":
        commands_to_run.append(data["comment"]["text"])
    else:
        return JSONResponse(
            status_code=status.HTTP_400_BAD_REQUEST,
            content=json.dumps({"message": "Unsupported event"}),
        )

    async def inner():
        try:
            await _run_commands_sequentially(commands_to_run, pr_url, log_context)
        except Exception as e:
            get_logger().error(f"Failed to handle webhook: {e}")

    background_tasks.add_task(inner)

    return JSONResponse(
        status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"})
    )


async def _run_commands_sequentially(commands: List[str], url: str, log_context: dict):
    get_logger().info(f"Running commands sequentially: {commands}")
    if commands is None:
        return

    for command in commands:
        try:
            body = _process_command(command, url)

            log_context["action"] = body
            log_context["api_url"] = url

            with get_logger().contextualize(**log_context):
                await PRAgent().handle_request(url, body)
        except Exception as e:
            get_logger().error(f"Failed to handle command: {command} , error: {e}")

def _process_command(command: str, url) -> str:
    # don't think we need this
    apply_repo_settings(url)
    # Process the command string
    split_command = command.split(" ")
    command = split_command[0]
    args = split_command[1:]
    # do I need this? if yes, shouldn't this be done in PRAgent?
    other_args = update_settings_from_args(args)
    new_command = ' '.join([command] + other_args)
    return new_command


def _to_list(command_string: str) -> list:
    try:
        # Use ast.literal_eval to safely parse the string into a list
        commands = ast.literal_eval(command_string)
        # Check if the parsed object is a list of strings
        if isinstance(commands, list) and all(isinstance(cmd, str) for cmd in commands):
            return commands
        else:
            raise ValueError("Parsed data is not a list of strings.")
    except (SyntaxError, ValueError, TypeError) as e:
        raise ValueError(f"Invalid command string: {e}")


def _get_commands_list_from_settings(setting_key:str ) -> list:
    try:
        return get_settings().get(setting_key, [])
    except ValueError as e:
        get_logger().error(f"Failed to get commands list from settings {setting_key}: {e}")


@router.get("/")
async def root():
    return {"status": "ok"}


def start():
    app = FastAPI(middleware=[Middleware(RawContextMiddleware)])
    app.include_router(router)
    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000")))


if __name__ == "__main__":
    start()


================================================
FILE: pr_agent/servers/gerrit_server.py
================================================
import copy
from enum import Enum
from json import JSONDecodeError

import uvicorn
from fastapi import APIRouter, FastAPI, HTTPException
from pydantic import BaseModel
from starlette.middleware import Middleware
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.log import get_logger, setup_logger

setup_logger()
router = APIRouter()


class Action(str, Enum):
    review = "review"
    describe = "describe"
    ask = "ask"
    improve = "improve"
    reflect = "reflect"
    answer = "answer"


class Item(BaseModel):
    refspec: str
    project: str
    msg: str


@router.post("/api/v1/gerrit/{action}")
async def handle_gerrit_request(action: Action, item: Item):
    get_logger().debug("Received a Gerrit request")
    context["settings"] = copy.deepcopy(global_settings)

    if action == Action.ask:
        if not item.msg:
            return HTTPException(
                status_code=400,
                detail="msg is required for ask command"
            )
    await PRAgent().handle_request(
        f"{item.project}:{item.refspec}",
        f"/{item.msg.strip()}"
    )


async def get_body(request):
    try:
        body = await request.json()
    except JSONDecodeError as e:
        get_logger().error("Error parsing request body", e)
        return {}
    return body


@router.get("/")
async def root():
    return {"status": "ok"}


def start():
    # to prevent adding help messages with the output
    get_settings().set("CONFIG.CLI_MODE", True)
    middleware = [Middleware(RawContextMiddleware)]
    app = FastAPI(middleware=middleware)
    app.include_router(router)

    uvicorn.run(app, host="0.0.0.0", port=3000)


if __name__ == '__main__':
    start()


================================================
FILE: pr_agent/servers/gitea_app.py
================================================
import copy
import os
import re
from typing import Any, Dict

from fastapi import APIRouter, FastAPI, HTTPException, Request, Response
from starlette.background import BackgroundTasks
from starlette.middleware import Middleware
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import LoggingFormat, get_logger, setup_logger
from pr_agent.servers.utils import verify_signature

# Setup logging and router
setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
router = APIRouter()

@router.post("/api/v1/gitea_webhooks")
async def handle_gitea_webhooks(background_tasks: BackgroundTasks, request: Request, response: Response):
    """Handle incoming Gitea webhook requests"""
    get_logger().debug("Received a Gitea webhook")

    body = await get_body(request)

    # Set context for the request
    context["settings"] = copy.deepcopy(global_settings)
    context["git_provider"] = {}

    # Handle the webhook in background
    background_tasks.add_task(handle_request, body, event=request.headers.get("X-Gitea-Event", None))
    return {}

async def get_body(request: Request):
    """Parse and verify webhook request body"""
    try:
        body = await request.json()
    except Exception as e:
        get_logger().error("Error parsing request body", artifact={'error': e})
        raise HTTPException(status_code=400, detail="Error parsing request body") from e


    # Verify webhook signature
    webhook_secret = getattr(get_settings().gitea, 'webhook_secret', None)
    if webhook_secret:
        body_bytes = await request.body()
        signature_header = request.headers.get('x-gitea-signature', None)
        if not signature_header:
            get_logger().error("Missing signature header")
            raise HTTPException(status_code=400, detail="Missing signature header")

        try:
            verify_signature(body_bytes, webhook_secret, f"sha256={signature_header}")
        except Exception as ex:
            get_logger().error(f"Invalid signature: {ex}")
            raise HTTPException(status_code=401, detail="Invalid signature")

    return body

async def handle_request(body: Dict[str, Any], event: str):
    """Process Gitea webhook events"""
    action = body.get("action")
    if not action:
        get_logger().debug("No action found in request body")
        return {}

    agent = PRAgent()

    # Handle different event types
    if event == "pull_request":
        if not should_process_pr_logic(body):
            get_logger().debug(f"Request ignored: PR logic filtering")
            return {}
        if action in ["opened", "reopened", "synchronized"]:
            await handle_pr_event(body, event, action, agent)
    elif event == "issue_comment":
        if action == "created":
            await handle_comment_event(body, event, action, agent)

    return {}

async def handle_pr_event(body: Dict[str, Any], event: str, action: str, agent: PRAgent):
    """Handle pull request events"""
    pr = body.get("pull_request", {})
    if not pr:
        return

    api_url = pr.get("url")
    if not api_url:
        return

    # Handle PR based on action
    if action in ["opened", "reopened"]:
        # commands = get_settings().get("gitea.pr_commands", [])
        await _perform_commands_gitea("pr_commands", agent, body, api_url)
        # for command in commands:
        #     await agent.handle_request(api_url, command)
    elif action == "synchronized":
        # Handle push to PR
        commands_on_push = get_settings().get(f"gitea.push_commands", {})
        handle_push_trigger = get_settings().get(f"gitea.handle_push_trigger", False)
        if not commands_on_push or not handle_push_trigger:
            get_logger().info("Push event, but no push commands found or push trigger is disabled")
            return
        get_logger().debug(f'A push event has been received: {api_url}')
        await _perform_commands_gitea("push_commands", agent, body, api_url)
        # for command in commands_on_push:
        #     await agent.handle_request(api_url, command)

async def handle_comment_event(body: Dict[str, Any], event: str, action: str, agent: PRAgent):
    """Handle comment events"""
    comment = body.get("comment", {})
    if not comment:
        return

    comment_body = comment.get("body", "")
    if not comment_body or not comment_body.startswith("/"):
        return

    pr_url = body.get("pull_request", {}).get("url")
    if not pr_url:
        return

    await agent.handle_request(pr_url, comment_body)

async def _perform_commands_gitea(commands_conf: str, agent: PRAgent, body: dict, api_url: str):
    apply_repo_settings(api_url)
    if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled
        get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}")
        return
    if not should_process_pr_logic(body): # Here we already updated the configuration with the repo settings
        return {}
    commands = get_settings().get(f"gitea.{commands_conf}")
    if not commands:
        get_logger().info(f"New PR, but no auto commands configured")
        return
    get_settings().set("config.is_auto_command", True)
    for command in commands:
        split_command = command.split(" ")
        command = split_command[0]
        args = split_command[1:]
        other_args = update_settings_from_args(args)
        new_command = ' '.join([command] + other_args)
        get_logger().info(f"{commands_conf}. Performing auto command '{new_command}', for {api_url=}")
        await agent.handle_request(api_url, new_command)

def should_process_pr_logic(body) -> bool:
    try:
        pull_request = body.get("pull_request", {})
        title = pull_request.get("title", "")
        pr_labels = pull_request.get("labels", [])
        source_branch = pull_request.get("head", {}).get("ref", "")
        target_branch = pull_request.get("base", {}).get("ref", "")
        sender = body.get("sender", {}).get("login")
        repo_full_name = body.get("repository", {}).get("full_name", "")

        # logic to ignore PRs from specific repositories
        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
        if ignore_repos and repo_full_name:
            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
                get_logger().info(f"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
        if ignore_pr_users and sender:
            if any(re.search(regex, sender) for regex in ignore_pr_users):
                get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting")
                return False

        # logic to ignore PRs with specific titles
        if title:
            ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
            if not isinstance(ignore_pr_title_re, list):
                ignore_pr_title_re = [ignore_pr_title_re]
            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
                get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting")
                return False

        # logic to ignore PRs with specific labels or source branches or target branches.
        ignore_pr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
        if pr_labels and ignore_pr_labels:
            labels = [label['name'] for label in pr_labels]
            if any(label in ignore_pr_labels for label in labels):
                labels_str = ", ".join(labels)
                get_logger().info(f"Ignoring PR with labels '{labels_str}' due to config.ignore_pr_labels settings")
                return False

        # logic to ignore PRs with specific source or target branches
        ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
        ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
        if pull_request and (ignore_pr_source_branches or ignore_pr_target_branches):
            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
                get_logger().info(
                    f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings")
                return False
            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
                get_logger().info(
                    f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings")
                return False
    except Exception as e:
        get_logger().error(f"Failed 'should_process_pr_logic': {e}")
    return True

# FastAPI app setup
middleware = [Middleware(RawContextMiddleware)]
app = FastAPI(middleware=middleware)
app.include_router(router)

def start():
    """Start the Gitea webhook server"""
    port = int(os.environ.get("PORT", "3000"))
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=port)

if __name__ == "__main__":
    start()


================================================
FILE: pr_agent/servers/github_action_runner.py
================================================
import asyncio
import json
import os
from typing import Union

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import get_logger
from pr_agent.servers.github_app import handle_line_comments
from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
from pr_agent.tools.pr_description import PRDescription
from pr_agent.tools.pr_reviewer import PRReviewer


def is_true(value: Union[str, bool]) -> bool:
    if isinstance(value, bool):
        return value
    if isinstance(value, str):
        return value.lower() == 'true'
    return False


def get_setting_or_env(key: str, default: Union[str, bool] = None) -> Union[str, bool]:
    try:
        value = get_settings().get(key, default)
    except AttributeError:  # TBD still need to debug why this happens on GitHub Actions
        value = os.getenv(key, None) or os.getenv(key.upper(), None) or os.getenv(key.lower(), None) or default
    return value


async def run_action():
    # Get environment variables
    GITHUB_EVENT_NAME = os.environ.get('GITHUB_EVENT_NAME')
    GITHUB_EVENT_PATH = os.environ.get('GITHUB_EVENT_PATH')
    OPENAI_KEY = os.environ.get('OPENAI_KEY') or os.environ.get('OPENAI.KEY')
    OPENAI_ORG = os.environ.get('OPENAI_ORG') or os.environ.get('OPENAI.ORG')
    GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
    # get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False)

    # Check if required environment variables are set
    if not GITHUB_EVENT_NAME:
        print("GITHUB_EVENT_NAME not set")
        return
    if not GITHUB_EVENT_PATH:
        print("GITHUB_EVENT_PATH not set")
        return
    if not GITHUB_TOKEN:
        print("GITHUB_TOKEN not set")
        return

    # Set the environment variables in the settings
    if OPENAI_KEY:
        get_settings().set("OPENAI.KEY", OPENAI_KEY)
    else:
        # Might not be set if the user is using models not from OpenAI
        print("OPENAI_KEY not set")
    if OPENAI_ORG:
        get_settings().set("OPENAI.ORG", OPENAI_ORG)
    get_settings().set("GITHUB.USER_TOKEN", GITHUB_TOKEN)
    get_settings().set("GITHUB.DEPLOYMENT_TYPE", "user")
    enable_output = get_setting_or_env("GITHUB_ACTION_CONFIG.ENABLE_OUTPUT", True)
    get_settings().set("GITHUB_ACTION_CONFIG.ENABLE_OUTPUT", enable_output)

    # Load the event payload
    try:
        with open(GITHUB_EVENT_PATH, 'r') as f:
            event_payload = json.load(f)
    except json.decoder.JSONDecodeError as e:
        print(f"Failed to parse JSON: {e}")
        return

    try:
        get_logger().info("Applying repo settings")
        pr_url = event_payload.get("pull_request", {}).get("html_url")
        if pr_url:
            apply_repo_settings(pr_url)
            get_logger().info(f"enable_custom_labels: {get_settings().config.enable_custom_labels}")
    except Exception as e:
        get_logger().info(f"github action: failed to apply repo settings: {e}")

    # Append the response language in the extra instructions
    try:
        response_language = get_settings().config.get('response_language', 'en-us')
        if response_language.lower() != 'en-us':
            get_logger().info(f'User has set the response language to: {response_language}')

            lang_instruction_text = f"Your response MUST be written in the language corresponding to locale code: '{response_language}'. This is crucial."
            separator_text = "\n======\n\nIn addition, "

            for key in get_settings():
                setting = get_settings().get(key)
                if str(type(setting)) == "<class 'dynaconf.utils.boxing.DynaBox'>":
                    if key.lower() in ['pr_description', 'pr_code_suggestions', 'pr_reviewer']:
                        if hasattr(setting, 'extra_instructions'):
                            extra_instructions = setting.extra_instructions

                            if lang_instruction_text not in str(extra_instructions):
                                updated_instructions = (
                                    str(extra_instructions) + separator_text + lang_instruction_text
                                    if extra_instructions else lang_instruction_text
                                )
                                setting.extra_instructions = updated_instructions
    except Exception as e:
        get_logger().info(f"github action: failed to apply language-specific instructions: {e}")
    # Handle pull request opened event
    if GITHUB_EVENT_NAME == "pull_request" or GITHUB_EVENT_NAME == "pull_request_target":
        action = event_payload.get("action")

        # Retrieve the list of actions from the configuration
        pr_actions = get_settings().get("GITHUB_ACTION_CONFIG.PR_ACTIONS", ["opened", "reopened", "ready_for_review", "review_requested"])

        if action in pr_actions:
            pr_url = event_payload.get("pull_request", {}).get("url")
            if pr_url:
                # legacy - supporting both GITHUB_ACTION and GITHUB_ACTION_CONFIG
                auto_review = get_setting_or_env("GITHUB_ACTION.AUTO_REVIEW", None)
                if auto_review is None:
                    auto_review = get_setting_or_env("GITHUB_ACTION_CONFIG.AUTO_REVIEW", None)
                auto_describe = get_setting_or_env("GITHUB_ACTION.AUTO_DESCRIBE", None)
                if auto_describe is None:
                    auto_describe = get_setting_or_env("GITHUB_ACTION_CONFIG.AUTO_DESCRIBE", None)
                auto_improve = get_setting_or_env("GITHUB_ACTION.AUTO_IMPROVE", None)
                if auto_improve is None:
                    auto_improve = get_setting_or_env("GITHUB_ACTION_CONFIG.AUTO_IMPROVE", None)

                # Set the configuration for auto actions
                get_settings().config.is_auto_command = True  # Set the flag to indicate that the command is auto
                get_settings().pr_description.final_update_message = False  # No final update message when auto_describe is enabled
                get_logger().info(f"Running auto actions: auto_describe={auto_describe}, auto_review={auto_review}, auto_improve={auto_improve}")

                # invoke by default all three tools
                if auto_describe is None or is_true(auto_describe):
                    await PRDescription(pr_url).run()
                if auto_review is None or is_true(auto_review):
                    await PRReviewer(pr_url).run()
                if auto_improve is None or is_true(auto_improve):
                    await PRCodeSuggestions(pr_url).run()
        else:
            get_logger().info(f"Skipping action: {action}")

    # Handle issue comment event
    elif GITHUB_EVENT_NAME == "issue_comment" or GITHUB_EVENT_NAME == "pull_request_review_comment":
        action = event_payload.get("action")
        if action in ["created", "edited"]:
            comment_body = event_payload.get("comment", {}).get("body")
            try:
                if GITHUB_EVENT_NAME == "pull_request_review_comment":
                    if '/ask' in comment_body:
                        comment_body = handle_line_comments(event_payload, comment_body)
            except Exception as e:
                get_logger().error(f"Failed to handle line comments: {e}")
                return
            if comment_body:
                is_pr = False
                disable_eyes = False
                # check if issue is pull request
                if event_payload.get("issue", {}).get("pull_request"):
                    url = event_payload.get("issue", {}).get("pull_request", {}).get("url")
                    is_pr = True
                elif event_payload.get("comment", {}).get("pull_request_url"):  # for 'pull_request_review_comment
                    url = event_payload.get("comment", {}).get("pull_request_url")
                    is_pr = True
                    disable_eyes = True
                else:
                    url = event_payload.get("issue", {}).get("url")

                if url:
                    body = comment_body.strip().lower()
                    comment_id = event_payload.get("comment", {}).get("id")
                    provider = get_git_provider()(pr_url=url)
                    if is_pr:
                        await PRAgent().handle_request(
                            url, body, notify=lambda: provider.add_eyes_reaction(
                                comment_id, disable_eyes=disable_eyes
                            )
                        )
                    else:
                        await PRAgent().handle_request(url, body)


if __name__ == '__main__':
    asyncio.run(run_action())


================================================
FILE: pr_agent/servers/github_app.py
================================================
import asyncio.locks
import copy
import os
import re
import uuid
from typing import Any, Dict, Tuple

import uvicorn
from fastapi import APIRouter, FastAPI, HTTPException, Request, Response
from starlette.background import BackgroundTasks
from starlette.middleware import Middleware
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.git_providers import (get_git_provider,
                                    get_git_provider_with_context)
from pr_agent.git_providers.git_provider import IncrementalPR
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.identity_providers import get_identity_provider
from pr_agent.identity_providers.identity_provider import Eligibility
from pr_agent.log import LoggingFormat, get_logger, setup_logger
from pr_agent.servers.utils import DefaultDictWithTimeout, verify_signature

setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
base_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
build_number_path = os.path.join(base_path, "build_number.txt")
if os.path.exists(build_number_path):
    with open(build_number_path) as f:
        build_number = f.read().strip()
else:
    build_number = "unknown"
router = APIRouter()


@router.post("/api/v1/github_webhooks")
async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request, response: Response):
    """
    Receives and processes incoming GitHub webhook requests.
    Verifies the request signature, parses the request body, and passes it to the handle_request function for further
    processing.
    """
    get_logger().debug("Received a GitHub webhook")

    body = await get_body(request)

    installation_id = body.get("installation", {}).get("id")
    context["installation_id"] = installation_id
    context["settings"] = copy.deepcopy(global_settings)
    context["git_provider"] = {}
    background_tasks.add_task(handle_request, body, event=request.headers.get("X-GitHub-Event", None))
    return {}


@router.post("/api/v1/marketplace_webhooks")
async def handle_marketplace_webhooks(request: Request, response: Response):
    body = await get_body(request)
    get_logger().info(f'Request body:\n{body}')


async def get_body(request):
    try:
        body = await request.json()
    except Exception as e:
        get_logger().error("Error parsing request body", artifact={'error': e})
        raise HTTPException(status_code=400, detail="Error parsing request body") from e
    webhook_secret = getattr(get_settings().github, 'webhook_secret', None)
    if webhook_secret:
        body_bytes = await request.body()
        signature_header = request.headers.get('x-hub-signature-256', None)
        verify_signature(body_bytes, webhook_secret, signature_header)
    return body


_duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl)
_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl)

async def handle_comments_on_pr(body: Dict[str, Any],
                                event: str,
                                sender: str,
                                sender_id: str,
                                action: str,
                                log_context: Dict[str, Any],
                                agent: PRAgent):
    if "comment" not in body:
        return {}
    comment_body = body.get("comment", {}).get("body")
    if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"):
        if '/ask' in comment_body and comment_body.strip().startswith('> ![image]'):
            comment_body_split = comment_body.split('/ask')
            comment_body = '/ask' + comment_body_split[1] +' \n' +comment_body_split[0].strip().lstrip('>')
            get_logger().info(f"Reformatting comment_body so command is at the beginning: {comment_body}")
        else:
            get_logger().info("Ignoring comment not starting with /")
            return {}
    disable_eyes = False
    if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]:
        api_url = body["issue"]["pull_request"]["url"]
    elif "comment" in body and "pull_request_url" in body["comment"]:
        api_url = body["comment"]["pull_request_url"]
        try:
            if ('/ask' in comment_body and
                    'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"):
                # comment on a code line in the "files changed" tab
                comment_body = handle_line_comments(body, comment_body)
                disable_eyes = True
        except Exception as e:
            get_logger().error("Failed to get log context", artifact={'error': e})
    else:
        return {}
    log_context["api_url"] = api_url
    comment_id = body.get("comment", {}).get("id")
    provider = get_git_provider_with_context(pr_url=api_url)
    with get_logger().contextualize(**log_context):
        if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
            get_logger().info(f"Processing comment on PR {api_url=}, comment_body={comment_body}")
            await agent.handle_request(api_url, comment_body,
                        notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))
        else:
            get_logger().info(f"User {sender=} is not eligible to process comment on PR {api_url=}")

async def handle_new_pr_opened(body: Dict[str, Any],
                               event: str,
                               sender: str,
                               sender_id: str,
                               action: str,
                               log_context: Dict[str, Any],
                               agent: PRAgent):
    title = body.get("pull_request", {}).get("title", "")

    pull_request, api_url = _check_pull_request_event(action, body, log_context)
    if not (pull_request and api_url):
        get_logger().info(f"Invalid PR event: {action=} {api_url=}")
        return {}
    if action in get_settings().github_app.handle_pr_actions:  # ['opened', 'reopened', 'ready_for_review']
        # logic to ignore PRs with specific titles (e.g. "[Auto] ...")
        apply_repo_settings(api_url)
        if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
            await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context)
        else:
            get_logger().info(f"User {sender=} is not eligible to process PR {api_url=}")

async def handle_push_trigger_for_new_commits(body: Dict[str, Any],
                        event: str,
                        sender: str,
                        sender_id: str,
                        action: str,
                        log_context: Dict[str, Any],
                        agent: PRAgent):
    pull_request, api_url = _check_pull_request_event(action, body, log_context)
    if not (pull_request and api_url):
        return {}

    apply_repo_settings(api_url) # we need to apply the repo settings to get the correct settings for the PR. This is quite expensive - a call to the git provider is made for each PR event.
    if not get_settings().github_app.handle_push_trigger:
        return {}

    # TODO: do we still want to get the list of commits to filter bot/merge commits?
    before_sha = body.get("before")
    after_sha = body.get("after")
    merge_commit_sha = pull_request.get("merge_commit_sha")
    if before_sha == after_sha:
        return {}
    if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha:
        return {}

    # Prevent triggering multiple times for subsequent push triggers when one is enough:
    # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait.
    # Any more events will be discarded, because they will all trigger the exact same processing on the PR.
    # We let the second event wait instead of discarding it because while the first event was being processed,
    # more commits may have been pushed that led to the subsequent events,
    # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting.
    current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0)
    max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1
    if current_active_tasks < max_active_tasks:
        # first task can enter, and second tasks too if backlog is enabled
        get_logger().info(
            f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks"
        )
        _duplicate_push_triggers[api_url] += 1
    else:
        get_logger().info(
            f"Skipping push trigger for {api_url=} because another event already triggered the same processing"
        )
        return {}
    async with _pending_task_duplicate_push_conditions[api_url]:
        if current_active_tasks == 1:
            # second task waits
            get_logger().info(
                f"Waiting to process push trigger for {api_url=} because the first task is still in progress"
            )
            await _pending_task_duplicate_push_conditions[api_url].wait()
            get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow")

    try:
        if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
            get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}")
            await _perform_auto_commands_github("push_commands", agent, body, api_url, log_context)

    finally:
        # release the waiting task block
        async with _pending_task_duplicate_push_conditions[api_url]:
            _pending_task_duplicate_push_conditions[api_url].notify(1)
            _duplicate_push_triggers[api_url] -= 1


def handle_closed_pr(body, event, action, log_context):
    pull_request = body.get("pull_request", {})
    is_merged = pull_request.get("merged", False)
    if not is_merged:
        return
    api_url = pull_request.get("url", "")
    pr_statistics = get_git_provider()(pr_url=api_url).calc_pr_statistics(pull_request)
    log_context["api_url"] = api_url
    get_logger().info("PR-Agent statistics for closed PR", analytics=True, pr_statistics=pr_statistics, **log_context)


def get_log_context(body, event, action, build_number):
    sender = ""
    sender_id = ""
    sender_type = ""
    try:
        sender = body.get("sender", {}).get("login")
        sender_id = body.get("sender", {}).get("id")
        sender_type = body.get("sender", {}).get("type")
        repo = body.get("repository", {}).get("full_name", "")
        git_org = body.get("organization", {}).get("login", "")
        installation_id = body.get("installation", {}).get("id", "")
        app_name = get_settings().get("CONFIG.APP_NAME", "Unknown")
        log_context = {"action": action, "event": event, "sender": sender, "server_type": "github_app",
                       "request_id": uuid.uuid4().hex, "build_number": build_number, "app_name": app_name,
                        "repo": repo, "git_org": git_org, "installation_id": installation_id}
    except Exception as e:
        get_logger().error(f"Failed to get log context", artifact={'error': e})
        log_context = {}
    return log_context, sender, sender_id, sender_type


def is_bot_user(sender, sender_type):
    try:
        # logic to ignore PRs opened by bot
        if get_settings().get("GITHUB_APP.IGNORE_BOT_PR", False) and sender_type == "Bot":
            if 'pr-agent' not in sender:
                get_logger().info(f"Ignoring PR from '{sender=}' because it is a bot")
            return True
    except Exception as e:
        get_logger().error(f"Failed 'is_bot_user' logic: {e}")
    return False


def should_process_pr_logic(body) -> bool:
    try:
        pull_request = body.get("pull_request", {})
        title = pull_request.get("title", "")
        pr_labels = pull_request.get("labels", [])
        source_branch = pull_request.get("head", {}).get("ref", "")
        target_branch = pull_request.get("base", {}).get("ref", "")
        sender = body.get("sender", {}).get("login")
        repo_full_name = body.get("repository", {}).get("full_name", "")

        # logic to ignore PRs from specific repositories
        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
        if ignore_repos and repo_full_name:
            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
                get_logger().info(f"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
        if ignore_pr_users and sender:
            if any(re.search(regex, sender) for regex in ignore_pr_users):
                get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' setting")
                return False

        # logic to ignore PRs with specific titles
        if title:
            ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
            if not isinstance(ignore_pr_title_re, list):
                ignore_pr_title_re = [ignore_pr_title_re]
            if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
                get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting")
                return False

        # logic to ignore PRs with specific labels or source branches or target branches.
        ignore_pr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
        if pr_labels and ignore_pr_labels:
            labels = [label['name'] for label in pr_labels]
            if any(label in ignore_pr_labels for label in labels):
                labels_str = ", ".join(labels)
                get_logger().info(f"Ignoring PR with labels '{labels_str}' due to config.ignore_pr_labels settings")
                return False

        # logic to ignore PRs with specific source or target branches
        ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
        ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
        if pull_request and (ignore_pr_source_branches or ignore_pr_target_branches):
            if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
                get_logger().info(
                    f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings")
                return False
            if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
                get_logger().info(
                    f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings")
                return False
    except Exception as e:
        get_logger().error(f"Failed 'should_process_pr_logic': {e}")
    return True


async def handle_request(body: Dict[str, Any], event: str):
    """
    Handle incoming GitHub webhook requests.

    Args:
        body: The request body.
        event: The GitHub event type (e.g. "pull_request", "issue_comment", etc.).
    """
    action = body.get("action")  # "created", "opened", "reopened", "ready_for_review", "review_requested", "synchronize"
    get_logger().debug(f"Handling request with event: {event}, action: {action}")
    if not action:
        get_logger().debug(f"No action found in request body, exiting handle_request")
        return {}
    agent = PRAgent()
    log_context, sender, sender_id, sender_type = get_log_context(body, event, action, build_number)

    # logic to ignore PRs opened by bot, PRs with specific titles, labels, source branches, or target branches
    if is_bot_user(sender, sender_type) and 'check_run' not in body:
        get_logger().debug(f"Request ignored: bot user detected")
        return {}
    if action != 'created' and 'check_run' not in body:
        if not should_process_pr_logic(body):
            get_logger().debug(f"Request ignored: PR logic filtering")
            return {}

    if 'check_run' in body:  # handle failed checks
        # get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_checks
        pass
    # handle comments on PRs
    elif action == 'created':
        get_logger().debug(f'Request body', artifact=body, event=event)
        await handle_comments_on_pr(body, event, sender, sender_id, action, log_context, agent)
    # handle new PRs
    elif event == 'pull_request' and action != 'synchronize' and action != 'closed':
        get_logger().debug(f'Request body', artifact=body, event=event)
        await handle_new_pr_opened(body, event, sender, sender_id, action, log_context, agent)
    elif event == "issue_comment" and 'edited' in action:
        pass # handle_checkbox_clicked
    # handle pull_request event with synchronize action - "push trigger" for new commits
    elif event == 'pull_request' and action == 'synchronize':
        await handle_push_trigger_for_new_commits(body, event, sender,sender_id,  action, log_context, agent)
    elif event == 'pull_request' and action == 'closed':
        if get_settings().get("CONFIG.ANALYTICS_FOLDER", ""):
            handle_closed_pr(body, event, action, log_context)
    else:
        get_logger().info(f"event {event=} action {action=} does not require any handling")
    return {}


def handle_line_comments(body: Dict, comment_body: [str, Any]) -> str:
    if not comment_body:
        return ""
    start_line = body["comment"]["start_line"]
    end_line = body["comment"]["line"]
    start_line = end_line if not start_line else start_line
    question = comment_body.replace('/ask', '').strip()
    diff_hunk = body["comment"]["diff_hunk"]
    get_settings().set("ask_diff_hunk", diff_hunk)
    path = body["comment"]["path"]
    side = body["comment"]["side"]
    comment_id = body["comment"]["id"]
    if '/ask' in comment_body:
        comment_body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}"
    return comment_body


def _check_pull_request_event(action: str, body: dict, log_context: dict) -> Tuple[Dict[str, Any], str]:
    invalid_result = {}, ""
    pull_request = body.get("pull_request")
    if not pull_request:
        return invalid_result
    api_url = pull_request.get("url")
    if not api_url:
        return invalid_result
    log_context["api_url"] = api_url
    if pull_request.get("draft", True) or pull_request.get("state") != "open":
        return invalid_result
    if action in ("review_requested", "synchronize") and pull_request.get("created_at") == pull_request.get("updated_at"):
        # avoid double reviews when opening a PR for the first time
        return invalid_result
    return pull_request, api_url


async def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body: dict, api_url: str,
                                        log_context: dict):
    apply_repo_settings(api_url)
    if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled
        get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}")
        return
    if not should_process_pr_logic(body): # Here we already updated the configuration with the repo settings
        return {}
    commands = get_settings().get(f"github_app.{commands_conf}")
    if not commands:
        get_logger().info(f"New PR, but no auto commands configured")
        return
    get_settings().set("config.is_auto_command", True)
    for command in commands:
        split_command = command.split(" ")
        command = split_command[0]
        args = split_command[1:]
        other_args = update_settings_from_args(args)
        new_command = ' '.join([command] + other_args)
        get_logger().info(f"{commands_conf}. Performing auto command '{new_command}', for {api_url=}")
        await agent.handle_request(api_url, new_command)


@router.get("/")
async def root():
    return {"status": "ok"}


if get_settings().github_app.override_deployment_type:
    # Override the deployment type to app
    get_settings().set("GITHUB.DEPLOYMENT_TYPE", "app")
# get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False)
middleware = [Middleware(RawContextMiddleware)]
app = FastAPI(middleware=middleware)
app.include_router(router)


def start():
    uvicorn.run(app, host="0.0.0.0", port=int(os.environ.get("PORT", "3000")))


if __name__ == '__main__':
    start()


================================================
FILE: pr_agent/servers/github_lambda_webhook.py
================================================
from fastapi import FastAPI
from mangum import Mangum
from starlette.middleware import Middleware
from starlette_context.middleware import RawContextMiddleware

from pr_agent.servers.github_app import router

try:
    from pr_agent.config_loader import apply_secrets_manager_config
    apply_secrets_manager_config()
except Exception as e:
    try:
        from pr_agent.log import get_logger
        get_logger().debug(f"AWS Secrets Manager initialization failed, falling back to environment variables: {e}")
    except:
        # Fail completely silently if log module is not available
        pass

middleware = [Middleware(RawContextMiddleware)]
app = FastAPI(middleware=middleware)
app.include_router(router)

handler = Mangum(app, lifespan="off")


def lambda_handler(event, context):
    return handler(event, context)

================================================
FILE: pr_agent/servers/github_polling.py
================================================
import asyncio
import multiprocessing
import time
import traceback
from collections import deque
from datetime import datetime, timezone

import aiohttp
import requests

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.log import LoggingFormat, get_logger, setup_logger

setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
NOTIFICATION_URL = "https://api.github.com/notifications"


async def mark_notification_as_read(headers, notification, session):
    async with session.patch(
            f"https://api.github.com/notifications/threads/{notification['id']}",
            headers=headers) as mark_read_response:
        if mark_read_response.status != 205:
            get_logger().error(
                f"Failed to mark notification as read. Status code: {mark_read_response.status}")


def now() -> str:
    """
    Get the current UTC time in ISO 8601 format.

    Returns:
        str: The current UTC time in ISO 8601 format.
    """
    now_utc = datetime.now(timezone.utc).isoformat()
    now_utc = now_utc.replace("+00:00", "Z")
    return now_utc

async def async_handle_request(pr_url, rest_of_comment, comment_id, git_provider):
    agent = PRAgent()
    success = await agent.handle_request(
        pr_url,
        rest_of_comment,
        notify=lambda: git_provider.add_eyes_reaction(comment_id)
    )
    return success

def run_handle_request(pr_url, rest_of_comment, comment_id, git_provider):
    return asyncio.run(async_handle_request(pr_url, rest_of_comment, comment_id, git_provider))


def process_comment_sync(pr_url, rest_of_comment, comment_id):
    try:
        # Run the async handle_request in a separate function
        git_provider = get_git_provider()(pr_url=pr_url)
        success = run_handle_request(pr_url, rest_of_comment, comment_id, git_provider)
    except Exception as e:
        get_logger().error(f"Error processing comment: {e}", artifact={"traceback": traceback.format_exc()})


async def process_comment(pr_url, rest_of_comment, comment_id):
    try:
        git_provider = get_git_provider()(pr_url=pr_url)
        git_provider.set_pr(pr_url)
        agent = PRAgent()
        success = await agent.handle_request(
            pr_url,
            rest_of_comment,
            notify=lambda: git_provider.add_eyes_reaction(comment_id)
        )
        get_logger().info(f"Finished processing comment for PR: {pr_url}")
    except Exception as e:
        get_logger().error(f"Error processing comment: {e}", artifact={"traceback": traceback.format_exc()})

async def is_valid_notification(notification, headers, handled_ids, session, user_id):
    try:
        if 'reason' in notification and notification['reason'] == 'mention':
            if 'subject' in notification and notification['subject']['type'] == 'PullRequest':
                pr_url = notification['subject']['url']
                latest_comment = notification['subject']['latest_comment_url']
                if not latest_comment or not isinstance(latest_comment, str):
                    get_logger().debug(f"no latest_comment")
                    return False, handled_ids
                async with session.get(latest_comment, headers=headers) as comment_response:
                    check_prev_comments = False
                    user_tag = "@" + user_id
                    if comment_response.status == 200:
                        comment = await comment_response.json()
                        if 'id' in comment:
                            if comment['id'] in handled_ids:
                                get_logger().debug(f"comment['id'] in handled_ids")
                                return False, handled_ids
                            else:
                                handled_ids.add(comment['id'])
                        if 'user' in comment and 'login' in comment['user']:
                            if comment['user']['login'] == user_id:
                                get_logger().debug(f"comment['user']['login'] == user_id")
                                check_prev_comments = True
                        comment_body = comment.get('body', '')
                        if not comment_body:
                            get_logger().debug(f"no comment_body")
                            check_prev_comments = True
                        else:
                            if user_tag not in comment_body:
                                get_logger().debug(f"user_tag not in comment_body")
                                check_prev_comments = True
                            else:
                                get_logger().info(f"Polling, pr_url: {pr_url}",
                                                  artifact={"comment": comment_body})

                        if not check_prev_comments:
                            return True, handled_ids, comment, comment_body, pr_url, user_tag
                        else: # we could not find the user tag in the latest comment. Check previous comments
                            # get all comments in the PR
                            requests_url = f"{pr_url}/comments".replace("pulls", "issues")
                            comments_response = requests.get(requests_url, headers=headers)
                            comments = comments_response.json()[::-1]
                            max_comment_to_scan = 4
                            for comment in comments[:max_comment_to_scan]:
                                if 'user' in comment and 'login' in comment['user']:
                                    if comment['user']['login'] == user_id:
                                        continue
                                comment_body = comment.get('body', '')
                                if not comment_body:
                                    continue
                                if user_tag in comment_body:
                                    get_logger().info("found user tag in previous comments")
                                    get_logger().info(f"Polling, pr_url: {pr_url}",
                                                      artifact={"comment": comment_body})
                                    return True, handled_ids, comment, comment_body, pr_url, user_tag

                            get_logger().warning(f"Failed to fetch comments for PR: {pr_url}",
                                                    artifact={"comments": comments})
                            return False, handled_ids

        return False, handled_ids
    except Exception as e:
        get_logger().exception(f"Error processing polling notification",
                               artifact={"notification": notification, "error": e})
        return False, handled_ids


async def polling_loop():
    """
    Polls for notifications and handles them accordingly.
    """
    handled_ids = set()
    since = [now()]
    last_modified = [None]
    git_provider = get_git_provider()()
    user_id = git_provider.get_user_id()
    get_settings().set("CONFIG.PUBLISH_OUTPUT_PROGRESS", False)
    get_settings().set("pr_description.publish_description_as_comment", True)

    try:
        deployment_type = get_settings().github.deployment_type
        token = get_settings().github.user_token
    except AttributeError:
        deployment_type = 'none'
        token = None

    if deployment_type != 'user':
        raise ValueError("Deployment mode must be set to 'user' to get notifications")
    if not token:
        raise ValueError("User token must be set to get notifications")

    async with aiohttp.ClientSession() as session:
        while True:
            try:
                await asyncio.sleep(5)
                headers = {
                    "Accept": "application/vnd.github.v3+json",
                    "Authorization": f"Bearer {token}"
                }
                params = {
                    "participating": "true"
                }
                if since[0]:
                    params["since"] = since[0]
                if last_modified[0]:
                    headers["If-Modified-Since"] = last_modified[0]

                async with session.get(NOTIFICATION_URL, headers=headers, params=params) as response:
                    if response.status == 200:
                        if 'Last-Modified' in response.headers:
                            last_modified[0] = response.headers['Last-Modified']
                            since[0] = None
                        notifications = await response.json()
                        if not notifications:
                            continue
                        get_logger().info(f"Received {len(notifications)} notifications")
                        task_queue = deque()
                        for notification in notifications:
                            if not notification:
                                continue
                            # mark notification as read
                            await mark_notification_as_read(headers, notification, session)

                            handled_ids.add(notification['id'])
                            output = await is_valid_notification(notification, headers, handled_ids, session, user_id)
                            if output[0]:
                                _, handled_ids, comment, comment_body, pr_url, user_tag = output
                                rest_of_comment = comment_body.split(user_tag)[1].strip()
                                comment_id = comment['id']

                                # Add to the task queue
                                get_logger().info(
                                    f"Adding comment processing to task queue for PR, {pr_url}, comment_body: {comment_body}")
                                task_queue.append((process_comment_sync, (pr_url, rest_of_comment, comment_id)))
                                get_logger().info(f"Queued comment processing for PR: {pr_url}")
                            else:
                                get_logger().debug(f"Skipping comment processing for PR")

                        max_allowed_parallel_tasks = 10
                        if task_queue:
                            processes = []
                            for i, (func, args) in enumerate(task_queue):  # Create  parallel tasks
                                p = multiprocessing.Process(target=func, args=args)
                                processes.append(p)
                                p.start()
                                if i > max_allowed_parallel_tasks:
                                    get_logger().error(
                                        f"Dropping {len(task_queue) - max_allowed_parallel_tasks} tasks from polling session")
                                    break
                            task_queue.clear()

                            # Dont wait for all processes to complete. Move on to the next iteration
                            # for p in processes:
                            #     p.join()

                    elif response.status != 304:
                        print(f"Failed to fetch notifications. Status code: {response.status}")

            except Exception as e:
                get_logger().error(f"Polling exception during processing of a notification: {e}",
                                   artifact={"traceback": traceback.format_exc()})


if __name__ == '__main__':
    asyncio.run(polling_loop())


================================================
FILE: pr_agent/servers/gitlab_lambda_webhook.py
================================================
from fastapi import FastAPI
from mangum import Mangum
from starlette.middleware import Middleware
from starlette_context.middleware import RawContextMiddleware

from pr_agent.servers.gitlab_webhook import router

try:
    from pr_agent.config_loader import apply_secrets_manager_config
    apply_secrets_manager_config()
except Exception as e:
    try:
        from pr_agent.log import get_logger
        get_logger().debug(f"AWS Secrets Manager initialization failed, falling back to environment variables: {e}")
    except:
        # Fail completely silently if log module is not available
        pass

middleware = [Middleware(RawContextMiddleware)]
app = FastAPI(middleware=middleware)
app.include_router(router)

handler = Mangum(app, lifespan="off")


def lambda_handler(event, context):
    return handler(event, context)

================================================
FILE: pr_agent/servers/gitlab_webhook.py
================================================
import copy
import json
import os
import re
from datetime import datetime

import uvicorn
from fastapi import APIRouter, FastAPI, Request, status
from fastapi.encoders import jsonable_encoder
from fastapi.responses import JSONResponse
from starlette.background import BackgroundTasks
from starlette.middleware import Middleware
from starlette_context import context
from starlette_context.middleware import RawContextMiddleware

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.algo.utils import update_settings_from_args
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.git_providers.utils import apply_repo_settings
from pr_agent.log import LoggingFormat, get_logger, setup_logger
from pr_agent.secret_providers import get_secret_provider
from pr_agent.git_providers import get_git_provider_with_context

setup_logger(fmt=LoggingFormat.JSON, level=get_settings().get("CONFIG.LOG_LEVEL", "DEBUG"))
router = APIRouter()

secret_provider = get_secret_provider() if get_settings().get("CONFIG.SECRET_PROVIDER") else None


async def handle_request(api_url: str, body: str, log_context: dict, sender_id: str, notify=None):
    log_context["action"] = body
    log_context["event"] = "pull_request" if body == "/review" else "comment"
    log_context["api_url"] = api_url
    log_context["app_name"] = get_settings().get("CONFIG.APP_NAME", "Unknown")

    with get_logger().contextualize(**log_context):
        await PRAgent().handle_request(api_url, body, notify)

async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url: str,
                                   log_context: dict, data: dict):
    apply_repo_settings(api_url)
    if commands_conf == "pr_commands" and get_settings().config.disable_auto_feedback:  # auto commands for PR, and auto feedback is disabled
        get_logger().info(f"Auto feedback is disabled, skipping auto commands for PR {api_url=}", **log_context)
        return
    if not should_process_pr_logic(data): # Here we already updated the configurations
        return
    commands = get_settings().get(f"gitlab.{commands_conf}", {})
    get_settings().set("config.is_auto_command", True)
    for command in commands:
        try:
            split_command = command.split(" ")
            command = split_command[0]
            args = split_command[1:]
            other_args = update_settings_from_args(args)
            new_command = ' '.join([command] + other_args)
            get_logger().info(f"Performing command: {new_command}")
            with get_logger().contextualize(**log_context):
                await agent.handle_request(api_url, new_command)
        except Exception as e:
            get_logger().error(f"Failed to perform command {command}: {e}")


def is_bot_user(data) -> bool:
    try:
        # logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab)
        sender_name = data.get("user", {}).get("name", "unknown").lower()
        bot_indicators = ['codium', 'bot_', 'bot-', '_bot', '-bot']
        if any(indicator in sender_name for indicator in bot_indicators):
            get_logger().info(f"Skipping GitLab bot user: {sender_name}")
            return True
    except Exception as e:
        get_logger().error(f"Failed 'is_bot_user' logic: {e}")
    return False

def is_draft(data) -> bool:
    try:
        if 'draft' in data.get('object_attributes', {}):
            return data['object_attributes']['draft']

        # for gitlab server version before 16
        elif 'Draft:' in data.get('object_attributes', {}).get('title'):
            return True
    except Exception as e:
        get_logger().error(f"Failed 'is_draft' logic: {e}")
    return False

def is_draft_ready(data) -> bool:
    try:
        if 'draft' in data.get('changes', {}):
            # Handle both boolean values and string values for compatibility
            previous = data['changes']['draft']['previous']
            current = data['changes']['draft']['current']

            # Convert to boolean if they're strings
            if isinstance(previous, str):
                previous = previous.lower() == 'true'
            if isinstance(current, str):
                current = current.lower() == 'true'

            if previous is True and current is False:
                return True

        # for gitlab server version before 16
        elif 'title' in data.get('changes', {}):
            if 'Draft:' in data['changes']['title']['previous'] and 'Draft:' not in data['changes']['title']['current']:
                return True
    except Exception as e:
        get_logger().error(f"Failed 'is_draft_ready' logic: {e}")
    return False

def should_process_pr_logic(data) -> bool:
    try:
        if not data.get('object_attributes', {}):
            return False
        title = data['object_attributes'].get('title')
        sender = data.get("user", {}).get("username", "")
        repo_full_name = data.get('project', {}).get('path_with_namespace', "")

        # logic to ignore PRs from specific repositories
        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
        if ignore_repos and repo_full_name:
            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
                get_logger().info(f"Ignoring MR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
        if ignore_pr_users and sender:
            if any(re.search(regex, sender) for regex in ignore_pr_users):
                get_logger().info(f"Ignoring PR from user '{sender}' due to 'config.ignore_pr_authors' settings")
                return False

        # logic to ignore MRs for titles, labels and source, target branches.
        ignore_mr_title = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
        ignore_mr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
        ignore_mr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
        ignore_mr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])

        #
        if ignore_mr_source_branches:
            source_branch = data['object_attributes'].get('source_branch')
            if any(re.search(regex, source_branch) for regex in ignore_mr_source_branches):
                get_logger().info(
                    f"Ignoring MR with source branch '{source_branch}' due to gitlab.ignore_mr_source_branches settings")
                return False

        if ignore_mr_target_branches:
            target_branch = data['object_attributes'].get('target_branch')
            if any(re.search(regex, target_branch) for regex in ignore_mr_target_branches):
                get_logger().info(
                    f"Ignoring MR with target branch '{target_branch}' due to gitlab.ignore_mr_target_branches settings")
                return False

        if ignore_mr_labels:
            labels = [label['title'] for label in data['object_attributes'].get('labels', [])]
            if any(label in ignore_mr_labels for label in labels):
                labels_str = ", ".join(labels)
                get_logger().info(f"Ignoring MR with labels '{labels_str}' due to gitlab.ignore_mr_labels settings")
                return False

        if ignore_mr_title:
            if any(re.search(regex, title) for regex in ignore_mr_title):
                get_logger().info(f"Ignoring MR with title '{title}' due to gitlab.ignore_mr_title settings")
                return False
    except Exception as e:
        get_logger().error(f"Failed 'should_process_pr_logic': {e}")
    return True


@router.post("/webhook")
async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
    start_time = datetime.now()
    request_json = await request.json()
    context["settings"] = copy.deepcopy(global_settings)

    async def inner(data: dict):
        log_context = {"server_type": "gitlab_app"}
        get_logger().debug("Received a GitLab webhook")
        if request.headers.get("X-Gitlab-Token") and secret_provider:
            request_token = request.headers.get("X-Gitlab-Token")
            secret = secret_provider.get_secret(request_token)
            if not secret:
                get_logger().warning(f"Empty secret retrieved, request_token: {request_token}")
                return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED,
                                    content=jsonable_encoder({"message": "unauthorized"}))
            try:
                secret_dict = json.loads(secret)
                gitlab_token = secret_dict["gitlab_token"]
                log_context["token_id"] = secret_dict.get("token_name", secret_dict.get("id", "unknown"))
                context["settings"].gitlab.personal_access_token = gitlab_token
            except Exception as e:
                get_logger().error(f"Failed to validate secret {request_token}: {e}")
                return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))
        elif get_settings().get("GITLAB.SHARED_SECRET"):
            secret = get_settings().get("GITLAB.SHARED_SECRET")
            if not request.headers.get("X-Gitlab-Token") == secret:
                get_logger().error("Failed to validate secret")
                return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))
        else:
            get_logger().error("Failed to validate secret")
            return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))
        gitlab_token = get_settings().get("GITLAB.PERSONAL_ACCESS_TOKEN", None)
        if not gitlab_token:
            get_logger().error("No gitlab token found")
            return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, content=jsonable_encoder({"message": "unauthorized"}))

        get_logger().info("GitLab data", artifact=data)
        sender = data.get("user", {}).get("username", "unknown")
        sender_id = data.get("user", {}).get("id", "unknown")

        # ignore bot users
        if is_bot_user(data):
            return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))

        log_context["sender"] = sender
        if data.get('object_kind') == 'merge_request':
            # ignore MRs based on title, labels, source and target branches
            if not should_process_pr_logic(data):
                return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
            object_attributes = data.get('object_attributes', {})
            if object_attributes.get('action') in ['open', 'reopen']:
                url = object_attributes.get('url')
                get_logger().info(f"New merge request: {url}")
                if is_draft(data):
                    get_logger().info(f"Skipping draft MR: {url}")
                    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))

                await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context, data)

            # for push event triggered merge requests
            elif object_attributes.get('action') == 'update' and object_attributes.get('oldrev'):
                url = object_attributes.get('url')
                get_logger().info(f"New merge request: {url}")
                if is_draft(data):
                    get_logger().info(f"Skipping draft MR: {url}")
                    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))

                # Apply repo settings before checking push commands or handle_push_trigger
                apply_repo_settings(url)

                commands_on_push = get_settings().get(f"gitlab.push_commands", {})
                handle_push_trigger = get_settings().get(f"gitlab.handle_push_trigger", False)
                if not commands_on_push or not handle_push_trigger:
                    get_logger().info("Push event, but no push commands found or push trigger is disabled")
                    return JSONResponse(status_code=status.HTTP_200_OK,
                                        content=jsonable_encoder({"message": "success"}))

                get_logger().debug(f'A push event has been received: {url}')
                await _perform_commands_gitlab("push_commands", PRAgent(), url, log_context, data)
                
            # for draft to ready triggered merge requests
            elif object_attributes.get('action') == 'update' and is_draft_ready(data):
                url = object_attributes.get('url')
                get_logger().info(f"Draft MR is ready: {url}")

                # same as open MR
                await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context, data)

        elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR
            if 'merge_request' in data:
                mr = data['merge_request']
                url = mr.get('url')
                comment_id = data.get('object_attributes', {}).get('id')
                provider = get_git_provider_with_context(pr_url=url)

                get_logger().info(f"A comment has been added to a merge request: {url}")
                body = data.get('object_attributes', {}).get('note')
                if data.get('object_attributes', {}).get('type') == 'DiffNote' and '/ask' in body: # /ask_line
                    body = handle_ask_line(body, data)

                await handle_request(url, body, log_context, sender_id, notify=lambda: provider.add_eyes_reaction(comment_id))

    background_tasks.add_task(inner, request_json)
    end_time = datetime.now()
    get_logger().info(f"Processing time: {end_time - start_time}", request=request_json)
    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))


def handle_ask_line(body, data):
    try:
        line_range_ = data['object_attributes']['position']['line_range']
        # if line_range_['start']['type'] == 'new':
        start_line = line_range_['start']['new_line']
        end_line = line_range_['end']['new_line']
        # else:
        #     start_line = line_range_['start']['old_line']
        #     end_line = line_range_['end']['old_line']
        question = body.replace('/ask', '').strip()
        path = data['object_attributes']['position']['new_path']
        side = 'RIGHT'  # if line_range_['start']['type'] == 'new' else 'LEFT'
        comment_id = data['object_attributes']["discussion_id"]
        get_logger().info("Handling line ")
        body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}"
    except Exception as e:
        get_logger().error(f"Failed to handle ask line comment: {e}")
    return body


@router.get("/")
async def root():
    return {"status": "ok"}

gitlab_url = get_settings().get("GITLAB.URL", None)
if not gitlab_url:
    raise ValueError("GITLAB.URL is not set")
get_settings().config.git_provider = "gitlab"
middleware = [Middleware(RawContextMiddleware)]
app = FastAPI(middleware=middleware)
app.include_router(router)


def start():
    """
    Start the GitLab webhook server.

    The server port can be configured via the PORT environment variable.
    Defaults to 3000 if PORT is not set or invalid.
    """
    raw_port = os.environ.get("PORT")
    try:
        port = int(raw_port) if raw_port else 3000
        if not (1 <= port <= 65535):
            raise ValueError(f"Port {port} is out of valid range")
        if raw_port:
            get_logger().info(f"Using custom PORT from environment: {port}")
    except ValueError as e:
        get_logger().warning(f"Invalid PORT environment variable ({e}), using default port 3000")
        port = 3000
    uvicorn.run(app, host="0.0.0.0", port=port)


if __name__ == '__main__':
    start()


================================================
FILE: pr_agent/servers/gunicorn_config.py
================================================
import multiprocessing
import os

# from prometheus_client import multiprocess

# Sample Gunicorn configuration file.

#
# Server socket
#
#   bind - The socket to bind.
#
#       A string of the form: 'HOST', 'HOST:PORT', 'unix:PATH'.
#       An IP is a valid HOST.
#
#   backlog - The number of pending connections. This refers
#       to the number of clients that can be waiting to be
#       served. Exceeding this number results in the client
#       getting an error when attempting to connect. It should
#       only affect servers under significant load.
#
#       Must be a positive integer. Generally set in the 64-2048
#       range.
#

# bind = '0.0.0.0:5000'
bind = '0.0.0.0:3000'
backlog = 2048

#
# Worker processes
#
#   workers - The number of worker processes that this server
#       should keep alive for handling requests.
#
#       A positive integer generally in the 2-4 x $(NUM_CORES)
#       range. You'll want to vary this a bit to find the best
#       for your particular application's work load.
#
#   worker_class - The type of workers to use. The default
#       sync class should handle most 'normal' types of work
#       loads. You'll want to read
#       http://docs.gunicorn.org/en/latest/design.html#choosing-a-worker-type
#       for information on when you might want to choose one
#       of the other worker classes.
#
#       A string referring to a Python path to a subclass of
#       gunicorn.workers.base.Worker. The default provided values
#       can be seen at
#       http://docs.gunicorn.org/en/latest/settings.html#worker-class
#
#   worker_connections - For the eventlet and gevent worker classes
#       this limits the maximum number of simultaneous clients that
#       a single process can handle.
#
#       A positive integer generally set to around 1000.
#
#   timeout - If a worker does not notify the master process in this
#       number of seconds it is killed and a new worker is spawned
#       to replace it.
#
#       Generally set to thirty seconds. Only set this noticeably
#       higher if you're sure of the repercussions for sync workers.
#       For the non sync workers it just means that the worker
#       process is still communicating and is not tied to the length
#       of time required to handle a single request.
#
#   keepalive - The number of seconds to wait for the next request
#       on a Keep-Alive HTTP connection.
#
#       A positive integer. Generally set in the 1-5 seconds range.
#

if os.getenv('GUNICORN_WORKERS', None):
    workers = int(os.getenv('GUNICORN_WORKERS'))
else:
    cores = multiprocessing.cpu_count()
    workers = cores * 2 + 1
worker_connections = 1000
timeout = 240
keepalive = 2

#
#   spew - Install a trace function that spews every line of Python
#       that is executed when running the server. This is the
#       nuclear option.
#
#       True or False
#

spew = False

#
# Server mechanics
#
#   daemon - Detach the main Gunicorn process from the controlling
#       terminal with a standard fork/fork sequence.
#
#       True or False
#
#   raw_env - Pass environment variables to the execution environment.
#
#   pidfile - The path to a pid file to write
#
#       A path string or None to not write a pid file.
#
#   user - Switch worker processes to run as this user.
#
#       A valid user id (as an integer) or the name of a user that
#       can be retrieved with a call to pwd.getpwnam(value) or None
#       to not change the worker process user.
#
#   group - Switch worker process to run as this group.
#
#       A valid group id (as an integer) or the name of a user that
#       can be retrieved with a call to pwd.getgrnam(value) or None
#       to change the worker processes group.
#
#   umask - A mask for file permissions written by Gunicorn. Note that
#       this affects unix socket permissions.
#
#       A valid value for the os.umask(mode) call or a string
#       compatible with int(value, 0) (0 means Python guesses
#       the base, so values like "0", "0xFF", "0022" are valid
#       for decimal, hex, and octal representations)
#
#   tmp_upload_dir - A directory to store temporary request data when
#       requests are read. This will most likely be disappearing soon.
#
#       A path to a directory where the process owner can write. Or
#       None to signal that Python should choose one on its own.
#

daemon = False
raw_env = []
pidfile = None
umask = 0
user = None
group = None
tmp_upload_dir = None

#
#   Logging
#
#   logfile - The path to a log file to write to.
#
#       A path string. "-" means log to stdout.
#
#   loglevel - The granularity of log output
#
#       A string of "debug", "info", "warning", "error", "critical"
#

errorlog = '-'
loglevel = 'info'
accesslog = None
access_log_format = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'

#
# Process naming
#
#   proc_name - A base to use with setproctitle to change the way
#       that Gunicorn processes are reported in the system process
#       table. This affects things like 'ps' and 'top'. If you're
#       going to be running more than one instance of Gunicorn you'll
#       probably want to set a name to tell them apart. This requires
#       that you install the setproctitle module.
#
#       A string or None to choose a default of something like 'gunicorn'.
#

proc_name = None


#
# Server hooks
#
#   post_fork - Called just after a worker has been forked.
#
#       A callable that takes a server and worker instance
#       as arguments.
#
#   pre_fork - Called just prior to forking the worker subprocess.
#
#       A callable that accepts the same arguments as after_fork
#
#   pre_exec - Called just prior to forking off a secondary
#       master process during things like config reloading.
#
#       A callable that takes a server instance as the sole argument.
#


================================================
FILE: pr_agent/servers/help.py
================================================
class HelpMessage:
    @staticmethod
    def get_general_commands_text():
       commands_text = "> - **/review**: Request a review of your Pull Request.   \n" \
                "> - **/describe**: Update the PR title and description based on the contents of the PR.   \n" \
                "> - **/improve [--extended]**: Suggest code improvements. Extended mode provides a higher quality feedback.   \n" \
                "> - **/ask \\<QUESTION\\>**: Ask a question about the PR.   \n" \
                "> - **/update_changelog**: Update the changelog based on the PR's contents.   \n" \
                "> - **/help_docs \\<QUESTION\\>**: Given a path to documentation (either for this repository or for a given one), ask a question.   \n" \
                "> - **/add_docs**: Generate docstring for new components introduced in the PR.   \n" \
                "> - **/generate_labels**: Generate labels for the PR based on the PR's contents.   \n\n" \
                ">See the [tools guide](https://pr-agent-docs.codium.ai/tools/) for more details.\n" \
                ">To list the possible configuration parameters, add a **/config** comment.   \n"
       return commands_text


    @staticmethod
    def get_general_bot_help_text():
        output = f"> To invoke the PR-Agent, add a comment using one of the following commands:  \n{HelpMessage.get_general_commands_text()} \n"
        return output

    @staticmethod
    def get_review_usage_guide():
        output ="**Overview:**\n"
        output +=("The `review` tool scans the PR code changes, and generates a PR review which includes several types of feedbacks, such as possible PR issues, security threats and relevant test in the PR. More feedbacks can be [added](https://pr-agent-docs.codium.ai/tools/review/#general-configurations) by configuring the tool.\n\n"
                  "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on any PR.\n")
        output +="""\
- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L23) related to the review tool (`pr_reviewer` section), use the following template:
```
/review --pr_reviewer.some_config1=... --pr_reviewer.some_config2=...
```
- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:
```
[pr_reviewer]
some_config1=...
some_config2=...
```
    """

        output += f"\n\nSee the review [usage page](https://pr-agent-docs.codium.ai/tools/review/) for a comprehensive guide on using this tool.\n\n"

        return output


    @staticmethod
    def get_describe_usage_guide():
        output = "**Overview:**\n"
        output += "The `describe` tool scans the PR code changes, and generates a description for the PR - title, type, summary, walkthrough and labels. "
        output += "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\n"
        output += """\

When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L46) related to the describe tool (`pr_description` section), use the following template:
```
/describe --pr_description.some_config1=... --pr_description.some_config2=...
```
With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:
```
[pr_description]
some_config1=...
some_config2=...
```
"""
        output += "\n\n<table>"

        # automation
        output += "<tr><td><details> <summary><strong> Enabling\\disabling automation </strong></summary><hr>\n\n"
        output += """\
- When you first install the app, the [default mode](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) for the describe tool is:
```
pr_commands = ["/describe", ...]
```
meaning the `describe` tool will run automatically on every PR.

- Markers are an alternative way to control the generated description, to give maximal control to the user. If you set:
```
pr_commands = ["/describe --pr_description.use_description_markers=true", ...]
```
the tool will replace every marker of the form `pr_agent:marker_name` in the PR description with the relevant content, where `marker_name` is one of the following:
  - `type`: the PR type.
  - `summary`: the PR summary.
  - `walkthrough`: the PR walkthrough.
  - `diagram`: the PR sequence diagram (if enabled).

Note that when markers are enabled, if the original PR description does not contain any markers, the tool will not alter the description at all.

"""
        output += "\n\n</details></td></tr>\n\n"

        # custom labels
        output += "<tr><td><details> <summary><strong> Custom labels </strong></summary><hr>\n\n"
        output += """\
The default labels of the `describe` tool are quite generic: [`Bug fix`, `Tests`, `Enhancement`, `Documentation`, `Other`].

If you specify [custom labels](https://pr-agent-docs.codium.ai/tools/describe/#handle-custom-labels-from-the-repos-labels-page) in the repo's labels page or via configuration file, you can get tailored labels for your use cases.
Examples for custom labels:
- `Main topic:performance` - pr_agent:The main topic of this PR is performance
- `New endpoint` - pr_agent:A new endpoint was added in this PR
- `SQL query` - pr_agent:A new SQL query was added in this PR
- `Dockerfile changes` - pr_agent:The PR contains changes in the Dockerfile
- ...

The list above is eclectic, and aims to give an idea of different possibilities. Define custom labels that are relevant for your repo and use cases.
Note that Labels are not mutually exclusive, so you can add multiple label categories.
Make sure to provide proper title, and a detailed and well-phrased description for each label, so the tool will know when to suggest it.
"""
        output += "\n\n</details></td></tr>\n\n"

        # extra instructions
        output += "<tr><td><details> <summary><strong> Utilizing extra instructions</strong></summary><hr>\n\n"
        output += '''\
The `describe` tool can be configured with extra instructions, to guide the model to a feedback tailored to the needs of your project.

Be specific, clear, and concise in the instructions. With extra instructions, you are the prompter. Notice that the general structure of the description is fixed, and cannot be changed. Extra instructions can change the content or style of each sub-section of the PR description.

Examples for extra instructions:
```
[pr_description]
extra_instructions="""\
- The PR title should be in the format: '<PR type>: <title>'
- The title should be short and concise (up to 10 words)
- ...
"""
```
Use triple quotes to write multi-line instructions. Use bullet points to make the instructions more readable.
'''
        output += "\n\n</details></td></tr>\n\n"


        # general
        output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n"
        output += HelpMessage.get_general_bot_help_text()
        output += "\n\n</details></td></tr>\n\n"

        output += "</table>"

        output += f"\n\nSee the [describe usage](https://pr-agent-docs.codium.ai/tools/describe/) page for a comprehensive guide on using this tool.\n\n"

        return output

    @staticmethod
    def get_ask_usage_guide():
        output = "**Overview:**\n"
        output += """\
The `ask` tool answers questions about the PR, based on the PR code changes.
It can be invoked manually by commenting on any PR:
```
/ask "..."
```

Note that the tool does not have "memory" of previous questions, and answers each question independently.
You can ask questions about the entire PR, about specific code lines, or about an image related to the PR code changes.
        """
        # output += "\n\n<table>"
        #
        # # # general
        # # output += "\n\n<tr><td><details> <summary><strong> More PR-Agent commands</strong></summary><hr> \n\n"
        # # output += HelpMessage.get_general_bot_help_text()
        # # output += "\n\n</details></td></tr>\n\n"
        #
        # output += "</table>"

        output += f"\n\nSee the [ask usage](https://pr-agent-docs.codium.ai/tools/ask/) page for a comprehensive guide on using this tool.\n\n"

        return output


    @staticmethod
    def get_improve_usage_guide():
        output = "**Overview:**\n"
        output += "The code suggestions tool, named `improve`, scans the PR code changes, and automatically generates code suggestions for improving the PR."
        output += "The tool can be triggered [automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) every time a new PR is opened, or can be invoked manually by commenting on a PR.\n"
        output += """\
- When commenting, to edit [configurations](https://github.com/Codium-ai/pr-agent/blob/main/pr_agent/settings/configuration.toml#L78) related to the improve tool (`pr_code_suggestions` section), use the following template:

```
/improve --pr_code_suggestions.some_config1=... --pr_code_suggestions.some_config2=...
```

- With a [configuration file](https://pr-agent-docs.codium.ai/usage-guide/configuration_options/), use the following template:

```
[pr_code_suggestions]
some_config1=...
some_config2=...
```

"""

        output += f"\n\nSee the improve [usage page](https://pr-agent-docs.codium.ai/tools/improve/) for a comprehensive guide on using this tool.\n\n"

        return output


    @staticmethod
    def get_help_docs_usage_guide():
        output = "**Overview:**\n"
        output += """\
The help docs tool, named `help_docs`, answers a question based on a given relative path of documentation, either from the repository of this merge request or from a given one."
It can be invoked manually by commenting on any PR:
```
/help_docs "..."
```
"""
        output += f"\n\nSee the [help_docs usage](https://pr-agent-docs.codium.ai/tools/help_docs/) page for a comprehensive guide on using this tool.\n\n"
        return output


================================================
FILE: pr_agent/servers/utils.py
================================================
import hashlib
import hmac
import time
from collections import defaultdict
from typing import Any, Callable

from fastapi import HTTPException


def verify_signature(payload_body, secret_token, signature_header):
    """Verify that the payload was sent from GitHub by validating SHA256.

    Raise and return 403 if not authorized.

    Args:
        payload_body: original request body to verify (request.body())
        secret_token: GitHub app webhook token (WEBHOOK_SECRET)
        signature_header: header received from GitHub (x-hub-signature-256)
    """
    if not signature_header:
        raise HTTPException(status_code=403, detail="x-hub-signature-256 header is missing!")
    hash_object = hmac.new(secret_token.encode('utf-8'), msg=payload_body, digestmod=hashlib.sha256)
    expected_signature = "sha256=" + hash_object.hexdigest()
    if not hmac.compare_digest(expected_signature, signature_header):
        raise HTTPException(status_code=403, detail="Request signatures didn't match!")


class RateLimitExceeded(Exception):
    """Raised when the git provider API rate limit has been exceeded."""
    pass


class DefaultDictWithTimeout(defaultdict):
    """A defaultdict with a time-to-live (TTL)."""

    def __init__(
        self,
        default_factory: Callable[[], Any] = None,
        ttl: int = None,
        refresh_interval: int = 60,
        update_key_time_on_get: bool = True,
        *args,
        **kwargs,
    ):
        """
        Args:
            default_factory: The default factory to use for keys that are not in the dictionary.
            ttl: The time-to-live (TTL) in seconds.
            refresh_interval: How often to refresh the dict and delete items older than the TTL.
            update_key_time_on_get: Whether to update the access time of a key also on get (or only when set).
        """
        super().__init__(default_factory, *args, **kwargs)
        self.__key_times = dict()
        self.__ttl = ttl
        self.__refresh_interval = refresh_interval
        self.__update_key_time_on_get = update_key_time_on_get
        self.__last_refresh = self.__time() - self.__refresh_interval

    @staticmethod
    def __time():
        return time.monotonic()

    def __refresh(self):
        if self.__ttl is None:
            return
        request_time = self.__time()
        if request_time - self.__last_refresh > self.__refresh_interval:
            return
        to_delete = [key for key, key_time in self.__key_times.items() if request_time - key_time > self.__ttl]
        for key in to_delete:
            del self[key]
        self.__last_refresh = request_time

    def __getitem__(self, __key):
        if self.__update_key_time_on_get:
            self.__key_times[__key] = self.__time()
        self.__refresh()
        return super().__getitem__(__key)

    def __setitem__(self, __key, __value):
        self.__key_times[__key] = self.__time()
        return super().__setitem__(__key, __value)

    def __delitem__(self, __key):
        del self.__key_times[__key]
        return super().__delitem__(__key)


================================================
FILE: pr_agent/settings/.secrets_template.toml
================================================
# QUICKSTART:
# Copy this file to .secrets.toml in the same folder.
# The minimum workable settings - set openai.key to your API key.
# Set github.deployment_type to "user" and github.user_token to your GitHub personal access token.
# This will allow you to run the CLI scripts in the scripts/ folder and the github_polling server.
#
# See README for details about GitHub App deployment.

[openai]
key = ""  # Acquire through https://platform.openai.com
#org = "<ORGANIZATION>"  # Optional, may be commented out.
# Uncomment the following for Azure OpenAI
#api_type = "azure"
#api_version = '2023-05-15'  # Check Azure documentation for the current API version
#api_base = ""  # The base URL for your Azure OpenAI resource. e.g. "https://<your resource name>.openai.azure.com"
#deployment_id = ""  # The deployment name you chose when you deployed the engine
#fallback_deployments = []  # For each fallback model specified in configuration.toml in the [config] section, specify the appropriate deployment_id

# OpenAI Flex Processing (optional, for cost savings)
# [litellm]
# extra_body='{"processing_mode": "flex"}'
# model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock

[pinecone]
api_key = "..."
environment = "gcp-starter"

[qdrant]
# For Qdrant Cloud or self-hosted Qdrant
url = ""  # e.g., https://xxxxxxxx-xxxxxxxx.eu-central-1-0.aws.cloud.qdrant.io
api_key = ""

[anthropic]
key = "" # Optional, uncomment if you want to use Anthropic. Acquire through https://www.anthropic.com/

[cohere]
key = "" # Optional, uncomment if you want to use Cohere. Acquire through https://dashboard.cohere.ai/

[replicate]
key = "" # Optional, uncomment if you want to use Replicate. Acquire through https://replicate.com/

[groq]
key = "" # Acquire through https://console.groq.com/keys

[xai]
key = "" # Optional, uncomment if you want to use xAI. Acquire through https://console.x.ai/

[huggingface]
key = "" # Optional, uncomment if you want to use Huggingface Inference API. Acquire through https://huggingface.co/docs/api-inference/quicktour
api_base = "" # the base url for your huggingface inference endpoint

[ollama]
api_base = "" # the base url for your local Llama 2, Code Llama, and other models inference endpoint. Acquire through https://ollama.ai/

[vertexai]
vertex_project = "" # the google cloud platform project name for your vertexai deployment
vertex_location = "" # the google cloud platform location for your vertexai deployment

[google_ai_studio]
gemini_api_key = "" # the google AI Studio API key

[github]
# ---- Set the following only for deployment type == "user"
user_token = ""  # A GitHub personal access token with 'repo' scope.
deployment_type = "user" #set to user by default

# ---- Set the following only for deployment type == "app", see README for details.
private_key = """\
-----BEGIN RSA PRIVATE KEY-----
<GITHUB PRIVATE KEY>
-----END RSA PRIVATE KEY-----
"""
app_id = 123456  # The GitHub App ID, replace with your own.
webhook_secret = "<WEBHOOK SECRET>"  # Optional, may be commented out.

[gitlab]
# Gitlab personal access token
personal_access_token = ""
shared_secret = ""  # webhook secret

[gitea]
# Gitea personal access token
personal_access_token=""
webhook_secret="" # webhook secret

[bitbucket]
# For Bitbucket authentication
auth_type = "bearer"  # "bearer" or "basic"
# For bearer token authentication
bearer_token = ""
# For basic authentication (uses token only)
basic_token = ""

[bitbucket_server]
# For Bitbucket Server bearer token
bearer_token = ""
webhook_secret = ""

# For Bitbucket app
app_key = ""
url = ""

[azure_devops]
# For Azure devops personal access token
org = ""
pat = ""

[azure_devops_server]
# For Azure devops Server basic auth - configured in the webhook creation
# Optional, uncomment if you want to use Azure devops webhooks. Value assinged when you create the webhook
# webhook_username = "<basic auth user>"
# webhook_password = "<basic auth password>"

[deepseek]
key = ""

[deepinfra]
key = ""

[azure_ad]
# Azure AD authentication for OpenAI services
client_id = ""  # Your Azure AD application client ID
client_secret = ""  # Your Azure AD application client secret
tenant_id = ""  # Your Azure AD tenant ID
api_base = ""  # Your Azure OpenAI service base URL (e.g., https://openai.xyz.com/)

[openrouter]
key = ""
api_base = ""

[aws]
AWS_ACCESS_KEY_ID = ""
AWS_SECRET_ACCESS_KEY = ""
AWS_REGION_NAME = ""

[aws_secrets_manager]
secret_arn = ""         # The ARN of the AWS Secrets Manager secret containing PR-Agent configuration
region_name = ""        # Optional: specific AWS region (defaults to AWS_REGION_NAME or Lambda region)


================================================
FILE: pr_agent/settings/code_suggestions/pr_code_suggestions_prompts.toml
================================================
[pr_code_suggestions_prompt]
system="""You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.
{%- if not focus_only_on_problems %}
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.
{%- else %}
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.
{%- endif %}

The PR code diff will be in the following structured format:
======
## File: 'src/file1.py'
{%- if is_ai_metadata %}
### AI-generated changes summary:
* ...
* ...
{%- endif %}

@@ ... @@ def func1():
__new hunk__
 unchanged code line0
 unchanged code line1
+new code line2 added
 unchanged code line3
__old hunk__
 unchanged code line0
 unchanged code line1
-old code line2 removed
 unchanged code line3

@@ ... @@ def func2():
__new hunk__
 unchanged code line4
+new code line5 added
 unchanged code line6

## File: 'src/file2.py'
...
======

Important notes about the structured diff format above:
1. Each PR code chunk is decoupled into separate '__new hunk__' and '__old hunk__' sections:
  - The '__new hunk__' section shows the code chunk AFTER the PR changes.
  - The '__old hunk__' section shows the code chunk BEFORE the PR changes. If no code was removed from the chunk, the '__old hunk__' section will be omitted.
2. The diff uses line prefixes to show changes:
  '+' → new line code added (will appear only in '__new hunk__')
  '-' → line code removed (will appear only in '__old hunk__')
  ' ' → unchanged context lines (will appear in both sections)
{%- if is_ai_metadata %}
3. When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.
{%- endif %}


Specific guidelines for generating code suggestions:
{%- if not focus_only_on_problems %}
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.
{%- else %}
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.
{%- endif %}
- DO NOT suggest implementing changes that are already present in the '+' lines compared to the '-' lines.
- Focus your suggestions ONLY on new code introduced in the PR ('+' lines in '__new hunk__' sections).
{%- if not focus_only_on_problems %}
- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.
- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
{%- else %}
- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.
- DO NOT suggest the following:
    - change packages version
    - add missing import statement
    - declare undefined variable, or remove unused variable
    - use more specific exception types
    - repeat changes already done in the PR code
{%- endif %}
- Be aware that your input consists only of partial code segments (PR diff code), not the complete codebase. Therefore, avoid making suggestions that might duplicate existing functionality, and refrain from questioning code elements (such as variable declarations or import statements) that may be defined elsewhere in the codebase.
- When mentioning code elements (variables, names, or files) in your response, surround them with backticks (`). For example: "verify that `user_id` is..."

{%- if extra_instructions %}


Extra user-provided instructions (should be addressed with high priority):
======
{{ extra_instructions }}
======
{%- endif %}


The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
    relevant_file: str = Field(description="Full path of the relevant file")
    language: str = Field(description="Programming language used by the relevant file")
    existing_code: str = Field(description="A short code snippet, from a '__new hunk__' section after the PR changes, that the suggestion aims to enhance or fix. Include only complete code lines. Use ellipsis (...) for brevity if needed. This snippet should represent the specific PR code targeted for improvement.")
    suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
    improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
    one_sentence_summary: str = Field(description="A concise, single-sentence overview (up to 6 words) of the suggested improvement. Focus on the 'what'. Be general, and avoid method or variable names.")
{%- if not focus_only_on_problems %}
    label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.")
{%- else %}
    label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.")
{%- endif %}


class PRCodeSuggestions(BaseModel):
    code_suggestions: List[CodeSuggestion]
=====


Example output:
```yaml
code_suggestions:
- relevant_file: |
    src/file1.py
  language: |
    python
  existing_code: |
    ...
  suggestion_content: |
    ...
  improved_code: |
    ...
  one_sentence_summary: |
    ...
  label: |
    ...
```

Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""

user="""--PR Info--

Title: '{{title}}'

{%- if date %}

Today's Date: {{date}}
{%- endif %}

The PR Diff:
======
{{ diff_no_line_numbers|trim }}
======

{%- if duplicate_prompt_examples %}


Example output:
```yaml
code_suggestions:
- relevant_file: |
    src/file1.py
  language: |
    python
  existing_code: |
    ...
  suggestion_content: |
    ...
  improved_code: |
    ...
  one_sentence_summary: |
    ...
  label: |
    ...
```
(replace '...' with actual content)
{%- endif %}


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/code_suggestions/pr_code_suggestions_prompts_not_decoupled.toml
================================================
[pr_code_suggestions_prompt_not_decoupled]
system="""You are PR-Reviewer, an AI specializing in Pull Request (PR) code analysis and suggestions.
{%- if not focus_only_on_problems %}
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix possible bugs and problems, and enhance code quality and performance.
{%- else %}
Your task is to examine the provided code diff, focusing on new code (lines prefixed with '+'), and offer concise, actionable suggestions to fix critical bugs and problems.
{%- endif %}


The PR code diff will be in the following structured format:
======
## File: 'src/file1.py'
{%- if is_ai_metadata %}
### AI-generated changes summary:
* ...
* ...
{%- endif %}

@@ ... @@ def func1():
 unchanged code line0
 unchanged code line1
+new code line2
-removed code line2
 unchanged code line3

@@ ... @@ def func2():
...


## File: 'src/file2.py'
...
======
The diff structure above uses line prefixes to show changes:
'+' → new line code added
'-' → line code removed
' ' → unchanged context lines
{%- if is_ai_metadata %}

When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or complete.
{%- endif %}


Specific guidelines for generating code suggestions:
{%- if not focus_only_on_problems %}
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions.
{%- else %}
- Provide up to {{ num_code_suggestions }} distinct and insightful code suggestions. Return less suggestions if no pertinent ones are applicable.
{%- endif %}
- Focus your suggestions ONLY on improving the new code introduced in the PR (lines starting with '+' in the diff). The lines in the diff starting with '-' are only for reference and should not be considered for suggestions.
{%- if not focus_only_on_problems %}
- Prioritize suggestions that address potential issues, critical problems, and bugs in the PR code. Avoid repeating changes already implemented in the PR. If no pertinent suggestions are applicable, return an empty list.
- Don't suggest to add docstring, type hints, or comments, to remove unused imports, or to use more specific exception types.
{%- else %}
- Only give suggestions that address critical problems and bugs in the PR code. If no relevant suggestions are applicable, return an empty list.
- DO NOT suggest the following:
    - change packages version
    - add missing import statement
    - declare undefined variable, add missing imports, etc.
    - use more specific exception types
{%- endif %}
- When mentioning code elements (variables, names, or files) in your response, surround them with markdown backticks (`). For example: "verify that `user_id` is..."
- Note that you will only see partial code segments that were changed (diff hunks in a PR code), and not the entire codebase. Avoid suggestions that might duplicate existing functionality of the outer codebase. In addition, the absence of a definition, declaration, import, or initialization for any entity in the PR code is NEVER a basis for a suggestion.
- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.

{%- if extra_instructions %}


Extra user-provided instructions (should be addressed with high priority):
======
{{ extra_instructions }}
======
{%- endif %}


The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
    relevant_file: str = Field(description="Full path of the relevant file")
    language: str = Field(description="Programming language used by the relevant file")
    existing_code: str = Field(description="A short code snippet, from the final state of the PR diff, that the suggestion will address. Select only the specific span of code that will be modified - without surrounding unchanged code. Preserve all indentation, newlines, and original formatting. Show the code snippet without the '+'/'-'/' ' prefixes. When providing suggestions for long code sections, shorten the presented code with ellipsis (...) for brevity where possible.")
    suggestion_content: str = Field(description="An actionable suggestion to enhance, improve or fix the new code introduced in the PR. Use 2-3 short sentences.")
    improved_code: str = Field(description="A refined code snippet that replaces the 'existing_code' snippet after implementing the suggestion.")
    one_sentence_summary: str = Field(description="A single-sentence overview (up to 6 words) of the suggestion. Focus on the 'what'. Be general, and avoid mentioning method or variable names.")
{%- if not focus_only_on_problems %}
    label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'possible bug', 'possible issue', 'performance', 'enhancement', 'best practice', 'maintainability', 'typo'. Other relevant labels are also acceptable.")
{%- else %}
    label: str = Field(description="A single, descriptive label that best characterizes the suggestion type. Possible labels include 'security', 'critical bug', 'general'. The 'general' section should be used for suggestions that address a major issue, but are not necessarily on a critical level.")
{%- endif %}


class PRCodeSuggestions(BaseModel):
    code_suggestions: List[CodeSuggestion]
=====


Example output:
```yaml
code_suggestions:
- relevant_file: |
    src/file1.py
  language: |
    python
  existing_code: |
    ...
  suggestion_content: |
    ...
  improved_code: |
    ...
  one_sentence_summary: |
    ...
  label: |
    ...
```

Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""

user="""--PR Info--

Title: '{{title}}'

{%- if date %}

Today's Date: {{date}}
{%- endif %}

The PR Diff:
======
{{ diff_no_line_numbers|trim }}
======

{%- if duplicate_prompt_examples %}


Example output:
```yaml
code_suggestions:
- relevant_file: |
    src/file1.py
  language: |
    python
  existing_code: |
    ...
  suggestion_content: |
    ...
  improved_code: |
    ...
  one_sentence_summary: |
    ...
  label: |
    ...
```
(replace '...' with actual content)
{%- endif %}


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/code_suggestions/pr_code_suggestions_reflect_prompts.toml
================================================
[pr_code_suggestions_reflect_prompt]
system="""You are an AI language model specialized in reviewing and evaluating code suggestions for a Pull Request (PR).
Your task is to analyze a PR code diff and evaluate the correctness and importance set of AI-generated code suggestions.
In addition to evaluating the suggestion correctness and importance, another sub-task you have is to detect the line numbers in the '__new hunk__' of the PR code diff section that correspond to the 'existing_code' snippet.

Examine each suggestion meticulously, assessing its quality, relevance, and accuracy within the context of PR. Keep in mind that the suggestions may vary in their correctness, accuracy and impact.
Consider the following components of each suggestion:
    1. 'one_sentence_summary' - A one-liner summary of the suggestion's purpose
    2. 'suggestion_content' - The suggestion content, explaining the proposed modification
    3. 'existing_code' - a code snippet from a __new hunk__ section in the PR code diff that the suggestion addresses
    4. 'improved_code' - a code snippet demonstrating how the 'existing_code' should be after the suggestion is applied

Be particularly vigilant for suggestions that:
    - Overlook crucial details in the PR code
    - The 'improved_code' section does not accurately reflect the suggested changes, in relation to the 'existing_code'
    - Contradict or ignore parts of the PR's modifications
In such cases, assign the suggestion a score of 0.

Evaluate each valid suggestion by scoring its potential impact on the PR's correctness, quality and functionality.
Key guidelines for evaluation:
- Thoroughly examine both the suggestion content and the corresponding PR code diff. Be vigilant for potential errors in each suggestion, ensuring they are logically sound, accurate, and directly derived from the PR code diff.
- Extend your review beyond the specifically mentioned code lines to encompass surrounding PR code context, verifying the suggestions' contextual accuracy.
- Validate the 'existing_code' field by confirming it matches or is accurately derived from code lines within a '__new hunk__' section of the PR code diff.
- Ensure the 'improved_code' section accurately reflects the 'existing_code' segment after the suggested modification is applied.
- Apply a nuanced scoring system:
  - Reserve high scores (8-10) for suggestions addressing critical issues such as major bugs or security concerns.
  - Assign moderate scores (3-7) to suggestions that tackle minor issues, improve code style, enhance readability, or boost maintainability.
  - Avoid inflating scores for suggestions that, while correct, offer only marginal improvements or optimizations.
- Maintain the original order of suggestions in your feedback, corresponding to their input sequence.

Additional scoring considerations:
- If the suggestion only asks the user to verify or ensure a change done in the PR, it should not receive a score above 7 (and may be lower).
- Error handling or type checking suggestions should not receive a score above 8 (and may be lower).
- If the 'existing_code' snippet is equal to the 'improved_code' snippet, it should not receive a score above 7 (and may be lower).
- Assume each suggestion is independent and is not influenced by the other suggestions.
- Assign a score of 0 to suggestions aiming at:
   - Adding docstring, type hints, or comments
   - Remove unused imports or variables
   - Add missing import statements
   - Using more specific exception types.
   - Questions the definition, declaration, import, or initialization of any entity in the PR code, that might be done in the outer codebase.


The PR code diff will be presented in the following structured format:
======
## File: 'src/file1.py'
{%- if is_ai_metadata %}
### AI-generated changes summary:
* ...
* ...
{%- endif %}

@@ ... @@ def func1():
__new hunk__
11  unchanged code line0
12  unchanged code line1
13 +new code line2 added
14  unchanged code line3
__old hunk__
 unchanged code line0
 unchanged code line1
-old code line2 removed
 unchanged code line3

@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...


## File: 'src/file2.py'
...
======
- In the format above, the diff is organized into separate '__new hunk__' and '__old hunk__' sections for each code chunk. '__new hunk__' contains the updated code, while '__old hunk__' shows the removed code. If no code was added or removed in a specific chunk, the corresponding section will be omitted.
- Line numbers are included for the '__new hunk__' sections to enable referencing specific lines in the code suggestions. These numbers are for reference only and are not part of the actual code.
- Code lines are prefixed with symbols: '+' for new code added in the PR, '-' for code removed, and ' ' for unchanged code.
{%- if is_ai_metadata %}
- When available, an AI-generated summary will precede each file's diff, with a high-level overview of the changes. Note that this summary may not be fully accurate or comprehensive.
{%- endif %}


The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:
=====
class CodeSuggestionFeedback(BaseModel):
    suggestion_summary: str = Field(description="Repeated from the input")
    relevant_file: str = Field(description="Repeated from the input")
    relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the added '__new hunk__' line numbers, and correspond to the first line of the relevant 'existing code' snippet.")
    relevant_lines_end: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion ends (inclusive). Should be derived from the added '__new hunk__' line numbers, and correspond to the end of the relevant 'existing code' snippet")
    suggestion_score: int = Field(description="Evaluate the suggestion and assign a score from 0 to 10. Give 0 if the suggestion is wrong. For valid suggestions, score from 1 (lowest impact/importance) to 10 (highest impact/importance).")
    why: str = Field(description="Briefly explain the score given in 1-2 short sentences, focusing on the suggestion's impact, relevance, and accuracy. When mentioning code elements (variables, names, or files) in your response, surround them with markdown backticks (`).")

class PRCodeSuggestionsFeedback(BaseModel):
    code_suggestions: List[CodeSuggestionFeedback]
=====


Example output:
```yaml
code_suggestions:
- suggestion_summary: |
    Use a more descriptive variable name here
  relevant_file: "src/file1.py"
  relevant_lines_start: 13
  relevant_lines_end: 14
  suggestion_score: 6
  why: |
    The variable name 't' is not descriptive enough
- ...
```


Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""

user="""You are given a Pull Request (PR) code diff:
======
{{ diff|trim }}
======


Below are {{ num_code_suggestions }} AI-generated code suggestions for the Pull Request:
======
{{ suggestion_str|trim }}
======


{%- if duplicate_prompt_examples %}


Example output:
```yaml
code_suggestions:
- suggestion_summary: |
    ...
  relevant_file: "..."
  relevant_lines_start: ...
  relevant_lines_end: ...
  suggestion_score: ...
  why: |
    ...
- ...
```
(replace '...' with actual content)
{%- endif %}

Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/configuration.toml
================================================
# Important: This file contains all available configuration options.
# Do not copy this entire file to your repository configuration.
# Your repository configuration should only include options you wish to override from the defaults.

[config]
# models
model="gpt-5.4-2026-03-05"
fallback_models=["o4-mini"]
#model_reasoning="o4-mini" # dedicated reasoning model for self-reflection
#model_weak="gpt-4o" # optional, a weaker model to use for some easier tasks
# CLI
git_provider="github"
publish_output=true
publish_output_progress=true
verbosity_level=0 # 0,1,2
use_extra_bad_extensions=false
# Log
log_level="DEBUG"
# Configurations
use_wiki_settings_file=true
use_repo_settings_file=true
use_global_settings_file=true
disable_auto_feedback = false
ai_timeout=120 # 2minutes
skip_keys = []
custom_reasoning_model = false # when true, disables system messages and temperature controls for models that don't support chat-style inputs
response_language="en-US" # Language locales code for PR responses in ISO 3166 and ISO 639 format (e.g., "en-US", "it-IT", "zh-CN", ...)
# token limits
max_description_tokens = 500
max_commits_tokens = 500
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
custom_model_max_tokens=-1 # for models not in the default list
model_token_count_estimate_factor=0.3 # factor to increase the token count estimate, in order to reduce likelihood of model failure due to too many tokens - applicable only when requesting an accurate estimate.
# patch extension logic
patch_extension_skip_types =[".md",".txt"]
allow_dynamic_context=true
max_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class
patch_extra_lines_before = 5 # Number of extra lines (+3 default ones) to include before each hunk in the patch
patch_extra_lines_after = 1 # Number of extra lines (+3 default ones) to include after each hunk in the patch
secret_provider="" # "" (disabled), "google_cloud_storage", or "aws_secrets_manager" for secure secret management
cli_mode=false
output_relevant_configurations=false
large_patch_policy = "clip" # "clip", "skip"
duplicate_prompt_examples = false
# seed
seed=-1 # set positive value to fix the seed (and ensure temperature=0)
temperature=0.2
# ignore logic
ignore_pr_title = ["^\\[Auto\\]", "^Auto"] # a list of regular expressions to match against the PR title to ignore the PR agent
ignore_pr_target_branches = [] # a list of regular expressions of target branches to ignore from PR agent when an PR is created
ignore_pr_source_branches = [] # a list of regular expressions of source branches to ignore from PR agent when an PR is created
ignore_pr_labels = [] # labels to ignore from PR agent when an PR is created
ignore_pr_authors = [] # authors to ignore from PR agent when an PR is created
ignore_repositories = [] # a list of regular expressions of repository full names (e.g. "org/repo") to ignore from PR agent processing
ignore_language_framework = [] # a list of code-generation languages or frameworks (e.g. 'protobuf', 'go_gen') whose auto-generated source files will be excluded from analysis
#
is_auto_command = false # will be auto-set to true if the command is triggered by an automation
enable_ai_metadata = false # will enable adding ai metadata
reasoning_effort = "medium" # "low", "medium", "high"
# extended thinking for Claude reasoning models
enable_claude_extended_thinking = false # Set to true to enable extended thinking feature
extended_thinking_budget_tokens = 2048
extended_thinking_max_output_tokens = 4096
# Extract issue number from PR source branch name (e.g. feature/1-auth-google -> issue #1). When true, branch-derived
# issue URLs are merged with tickets from the PR description for compliance. Set to false to restore description-only behaviour.
# Note: Branch-name extraction is GitHub-only for now; other providers planned for later.
extract_issue_from_branch = true
# Optional: custom regex with exactly one capturing group for the issue number (validated at runtime; falls back
# to default if missing). If empty, uses default pattern: first 1-6 digits at start of branch or after a slash,
# followed by hyphen or end (e.g. feature/1-test, 123-fix). GitHub only; other providers planned for later.
branch_issue_regex = ""


[pr_reviewer] # /review #
# enable/disable features
require_score_review=false
require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
require_security_review=true
require_estimate_contribution_time_cost=false
require_todo_scan=false
require_ticket_analysis_review=true
# general options
publish_output_no_suggestions=true # Set to "false" if you only need the reviewer's remarks (not labels, not "security audit", etc.) and want to avoid noisy "No major issues detected" comments.
persistent_comment=true
extra_instructions = ""
num_max_findings = 3
final_update_message = true
# review labels
enable_review_labels_security=true
enable_review_labels_effort=true
# specific configurations for incremental review (/review -i)
require_all_thresholds_for_incremental_review=false
minimal_commits_for_incremental_review=0
minimal_minutes_for_incremental_review=0
enable_intro_text=true
enable_help_text=false # Determines whether to include help text in the PR review. Enabled by default.

[pr_description] # /describe #
publish_labels=false
add_original_user_description=true
generate_ai_title=false
use_bullet_points=true
extra_instructions = ""
enable_pr_type=true
final_update_message = true
enable_help_text=false
enable_help_comment=false
enable_pr_diagram=true # adds a section with a diagram of the PR changes
# describe as comment
publish_description_as_comment=false
publish_description_as_comment_persistent=true
## changes walkthrough section
enable_semantic_files_types=true
collapsible_file_list='adaptive' # true, false, 'adaptive'
collapsible_file_list_threshold=6
inline_file_summary=false # false, true, 'table'
# markers
use_description_markers=false
enable_large_pr_handling=true
include_generated_by_header=true
#custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other']
max_ai_calls=4
async_ai_calls=true
[pr_questions] # /ask #
enable_help_text=false
use_conversation_history=true


[pr_code_suggestions] # /improve #
commitable_code_suggestions = false
dual_publishing_score_threshold=-1 # -1 to disable, [0-10] to set the threshold (>=) for publishing a code suggestion both in a table and as commitable
focus_only_on_problems=true
#
extra_instructions = ""
enable_help_text=false
enable_chat_text=false
persistent_comment=true
max_history_len=4
publish_output_no_suggestions=true
# suggestions scoring
suggestions_score_threshold=0 # [0-10]| recommend not to set this value above 8, since above it may clip highly relevant suggestions
new_score_mechanism=true
new_score_mechanism_th_high=9
new_score_mechanism_th_medium=7
# params for '/improve --extended' mode
auto_extended_mode=true
num_code_suggestions_per_chunk=3
max_number_of_calls = 3
parallel_calls = true

final_clip_factor = 0.8
decouple_hunks = false
# self-review checkbox
demand_code_suggestions_self_review=false # add a checkbox for the author to self-review the code suggestions
code_suggestions_self_review_text= "**Author self-review**: I have reviewed the PR code suggestions, and addressed the relevant ones."
approve_pr_on_self_review=false # if true, the PR will be auto-approved after the author clicks on the self-review checkbox
fold_suggestions_on_self_review=true # if true, the code suggestions will be folded after the author clicks on the self-review checkbox

[pr_custom_prompt] # /custom_prompt #
prompt = """\
The code suggestions should focus only on the following:
- ...
- ...
...
"""
suggestions_score_threshold=0
num_code_suggestions_per_chunk=3
self_reflect_on_custom_suggestions=true
enable_help_text=false


[pr_add_docs] # /add_docs #
extra_instructions = ""
docs_style = "Sphinx" # "Google Style with Args, Returns, Attributes...etc", "Numpy Style", "Sphinx Style", "PEP257", "reStructuredText"
file = ""              # in case there are several components with the same name, you can specify the relevant file
class_name = ""        # in case there are several methods with the same name in the same file, you can specify the relevant class name

[pr_update_changelog] # /update_changelog #
push_changelog_changes=false
extra_instructions = ""
add_pr_link=true
skip_ci_on_push=true

[pr_analyze] # /analyze #
enable_help_text=true

[pr_test] # /test #
extra_instructions = ""
testing_framework = "" # specify the testing framework you want to use
num_tests=3            # number of tests to generate. max 5.
avoid_mocks=true       # if true, the generated tests will prefer to use real objects instead of mocks
file = ""              # in case there are several components with the same name, you can specify the relevant file
class_name = ""        # in case there are several methods with the same name in the same file, you can specify the relevant class name
enable_help_text=false

[pr_improve_component] # /improve_component #
num_code_suggestions=4
extra_instructions = ""
file = ""              # in case there are several components with the same name, you can specify the relevant file
class_name = ""        # in case there are several methods with the same name in the same file, you can specify the relevant class name

[pr_help] # /help #
force_local_db=false
num_retrieved_snippets=5

[pr_config] # /config #

[pr_help_docs]
repo_url = "" #If not overwritten, will use the repo from where the context came from (issue or PR)
repo_default_branch = "main"
docs_path = "docs"
exclude_root_readme = false
supported_doc_exts = [".md", ".mdx", ".rst"]
enable_help_text=false

[github]
# The type of deployment to create. Valid values are 'app' or 'user'.
deployment_type = "user"
ratelimit_retries = 5
base_url = "https://api.github.com"
publish_inline_comments_fallback_with_verification = true
try_fix_invalid_inline_comments = true
app_name = "pr-agent"
ignore_bot_pr = true

[github_action_config]
# auto_review = true    # set as env var in .github/workflows/pr-agent.yaml
# auto_describe = true  # set as env var in .github/workflows/pr-agent.yaml
# auto_improve = true   # set as env var in .github/workflows/pr-agent.yaml
# pr_actions = ['opened', 'reopened', 'ready_for_review', 'review_requested']

[github_app]
# these toggles allows running the github app from custom deployments
bot_user = "github-actions[bot]"
override_deployment_type = true
# settings for "pull_request" event
handle_pr_actions = ['opened', 'reopened', 'ready_for_review']
pr_commands = [
    "/describe --pr_description.final_update_message=false",
    "/review",
    "/improve",
]
# settings for "pull_request" event with "synchronize" action - used to detect and handle push triggers for new commits
handle_push_trigger = false
push_trigger_ignore_bot_commits = true
push_trigger_ignore_merge_commits = true
push_trigger_wait_for_initial_review = true
push_trigger_pending_tasks_backlog = true
push_trigger_pending_tasks_ttl = 300
push_commands = [
    "/describe",
    "/review",
]

[gitlab]
url = "https://gitlab.com"
expand_submodule_diffs = false
pr_commands = [
    "/describe --pr_description.final_update_message=false",
    "/review",
    "/improve",
]
handle_push_trigger = false
push_commands = [
    "/describe",
    "/review",
]
# Configure SSL validation for GitLab. Can be either set to the path of a custom CA or disabled entirely.
# ssl_verify = true

[gitea]
url = "https://gitea.com"
handle_push_trigger = false
pr_commands = [
    "/describe",
    "/review",
    "/improve",
]
push_commands = [
    "/describe",
    "/review",
]

[bitbucket_app]
pr_commands = [
    "/describe --pr_description.final_update_message=false",
    "/review",
    "/improve --pr_code_suggestions.commitable_code_suggestions=true",
]
avoid_full_files = false

[local]
# LocalGitProvider settings - uncomment to use paths other than default
# description_path= "path/to/description.md"
# review_path= "path/to/review.md"

[gerrit]
# endpoint to the gerrit service
# url = "ssh://gerrit.example.com:29418"
# user for gerrit authentication
# user = "ai-reviewer"
# patch server where patches will be saved
# patch_server_endpoint = "http://127.0.0.1:5000/patch"
# token to authenticate in the patch server
# patch_server_token = ""

[bitbucket_server]
# URL to the BitBucket Server instance
# url = "https://git.bitbucket.com"
url = ""
pr_commands = [
    "/describe --pr_description.final_update_message=false",
    "/review",
    "/improve --pr_code_suggestions.commitable_code_suggestions=true",
]

[litellm]
# use_client = false
# drop_params = false
enable_callbacks = false
success_callback = []
failure_callback = []
service_callback = []
# model_id = "" # Optional: Custom inference profile ID for Amazon Bedrock

[pr_similar_issue]
skip_comments = false
force_update_dataset = false
max_issues_to_scan = 500
vectordb = "pinecone" # options: "pinecone", "lancedb", "qdrant"

[pr_find_similar_component]
class_name = ""
file = ""
search_from_org = false
allow_fallback_less_words = true
number_of_keywords = 5
number_of_results = 5

[pinecone]
# fill and place in .secrets.toml
#api_key = ...
# environment = "gcp-starter"

[lancedb]
uri = "./lancedb"

[qdrant]
# fill and place credentials in .secrets.toml
# url = "https://YOUR-QDRANT-URL"
# api_key = "..."

[best_practices]
content = ""
organization_name = ""
max_lines_allowed = 800
enable_global_best_practices = false

[auto_best_practices]
enable_auto_best_practices = true # public - general flag to disable all auto best practices usage
utilize_auto_best_practices = true # public - disable usage of auto best practices in the 'improve' tool
extra_instructions = "" # public - extra instructions to the auto best practices generation prompt
content = ""
max_patterns = 5 # max number of patterns to be detected

[azure_devops]
default_comment_status = "closed"

[azure_devops_server]
pr_commands = [
    "/describe",
    "/review",
    "/improve",
]


================================================
FILE: pr_agent/settings/custom_labels.toml
================================================
[config]
enable_custom_labels=false

## template for custom labels
#[custom_labels."Bug fix"]
#description = """Fixes a bug in the code"""
#[custom_labels."Tests"]
#description = """Adds or modifies tests"""
#[custom_labels."Bug fix with tests"]
#description = """Fixes a bug in the code and adds or modifies tests"""
#[custom_labels."Enhancement"]
#description = """Adds new features or modifies existing ones"""
#[custom_labels."Documentation"]
#description = """Adds or modifies documentation"""
#[custom_labels."Other"]
#description = """Other changes that do not fit in any of the above categories"""


================================================
FILE: pr_agent/settings/generated_code_ignore.toml
================================================
[generated_code]

# Protocol Buffers
protobuf = [
  "**/*.pb.go",
  "**/*.pb.cc",
  "**/*_pb2.py",
  "**/*.pb.swift",
  "**/*.pb.rb",
  "**/*.pb.php",
  "**/*.pb.h"
]

# OpenAPI / Swagger stubs
openapi = [
  "**/__generated__/**",
  "**/openapi_client/**",
  "**/openapi_server/**"
]
swagger = [
  "**/swagger.json",
  "**/swagger.yaml"
]

# GraphQL codegen
graphql = [
  "**/*.graphql.ts",
  "**/*.generated.ts",
  "**/*.graphql.js"
]

# RPC / gRPC Generators 
grpc_python      = ["**/*_grpc.py"]
grpc_java        = ["**/*Grpc.java"]
grpc_csharp      = ["**/*Grpc.cs"]
grpc_typescript  = ["**/*_grpc.ts", "**/*_grpc.js"]

# Go code generators
go_gen = [
  "**/*_gen.go",
  "**/*generated.go"
]


================================================
FILE: pr_agent/settings/ignore.toml
================================================
[ignore]

glob = [
    # Ignore files and directories matching these glob patterns.
    # See https://docs.python.org/3/library/glob.html
    'vendor/**',
]
regex = [
    # Ignore files and directories matching these regex patterns.
    # See https://learnbyexample.github.io/python-regex-cheatsheet/
    # for example: regex = ['.*\.toml$']
]


================================================
FILE: pr_agent/settings/language_extensions.toml
================================================
[bad_extensions]
default = [
    'app',
    'bin',
    'bmp',
    'bz2',
    'class',
    'csv',
    'dat',
    'db',
    'dll',
    'dylib',
    'egg',
    'eot',
    'exe',
    'gif',
    'gitignore',
    'glif',
    'gradle',
    'gz',
    'ico',
    'jar',
    'jpeg',
    'jpg',
    'lo',
    'lock',
    'log',
    'mp3',
    'mp4',
    'nar',
    'o',
    'ogg',
    'otf',
    'p',
    'pdf',
    'png',
    'pickle',
    'pkl',
    'pyc',
    'pyd',
    'pyo',
    'rkt',
    'so',
    'ss',
    'svg',
    'tar',
    'tgz',
    'tsv',
    'ttf',
    'war',
    'webm',
    'woff',
    'woff2',
    'xz',
    'zip',
    'zst',
    'snap',
    'lockb'
]
extra = [
    'md',
    'txt'
]

[language_extension_map_org]
"1C Enterprise" = ["*.bsl", ]
ABAP = [".abap", ]
"AGS Script" = [".ash", ]
AMPL = [".ampl", ]
ANTLR = [".g4", ]
"API Blueprint" = [".apib", ]
APL = [".apl", ".dyalog", ]
ASP = [".asp", ".asax", ".ascx", ".ashx", ".asmx", ".aspx", ".axd", ]
ATS = [".dats", ".hats", ".sats", ]
ActionScript = [".as", ]
Ada = [".adb", ".ada", ".ads", ]
Agda = [".agda", ]
Alloy = [".als", ]
ApacheConf = [".apacheconf", ".vhost", ]
AppleScript = [".applescript", ".scpt", ]
Arc = [".arc", ]
Arduino = [".ino", ]
AsciiDoc = [".asciidoc", ".adoc", ]
AspectJ = [".aj", ]
Assembly = [".asm", ".a51", ".nasm", ]
Augeas = [".aug", ]
AutoHotkey = [".ahk", ".ahkl", ]
AutoIt = [".au3", ]
Awk = [".awk", ".auk", ".gawk", ".mawk", ".nawk", ]
Batchfile = [".bat", ".cmd", ]
Befunge = [".befunge", ]
Bison = [".bison", ]
BitBake = [".bb", ]
BlitzBasic = [".decls", ]
BlitzMax = [".bmx", ]
Bluespec = [".bsv", ]
Boo = [".boo", ]
Brainfuck = [".bf", ]
Brightscript = [".brs", ]
Bro = [".bro", ]
C = [".c", ".cats", ".h", ".idc", ".w", ]
"C#" = [".cs", ".cake", ".cshtml", ".csx", ]
"C++" = [".cpp", ".c++", ".cc", ".cp", ".cxx", ".h++", ".hh", ".hpp", ".hxx", ".inl", ".ipp", ".tcc", ".tpp", ".C", ".H", ]
C-ObjDump = [".c-objdump", ]
"C2hs Haskell" = [".chs", ]
CLIPS = [".clp", ]
CMake = [".cmake", ".cmake.in", ]
COBOL = [".cob", ".cbl", ".ccp", ".cobol", ".cpy", ]
CSS = [".css", ]
CSV = [".csv", ]
"Cap'n Proto" = [".capnp", ]
CartoCSS = [".mss", ]
Ceylon = [".ceylon", ]
Chapel = [".chpl", ]
ChucK = [".ck", ]
Cirru = [".cirru", ]
Clarion = [".clw", ]
Clean = [".icl", ".dcl", ]
Click = [".click", ]
Clojure = [".clj", ".boot", ".cl2", ".cljc", ".cljs", ".cljs.hl", ".cljscm", ".cljx", ".hic", ]
CoffeeScript = [".coffee", "._coffee", ".cjsx", ".cson", ".iced", ]
ColdFusion = [".cfm", ".cfml", ]
"ColdFusion CFC" = [".cfc", ]
"Common Lisp" = [".lisp", ".asd", ".lsp", ".ny", ".podsl", ".sexp", ]
"Component Pascal" = [".cps", ]
Coq = [".coq", ]
Cpp-ObjDump = [".cppobjdump", ".c++-objdump", ".c++objdump", ".cpp-objdump", ".cxx-objdump", ]
Creole = [".creole", ]
Crystal = [".cr", ]
Csound = [".csd", ]
Cucumber = [".feature", ]
Cuda = [".cu", ".cuh", ]
Cycript = [".cy", ]
Cython = [".pyx", ".pxd", ".pxi", ]
D = [".di", ]
D-ObjDump = [".d-objdump", ]
"DIGITAL Command Language" = [".com", ]
DM = [".dm", ]
"DNS Zone" = [".zone", ".arpa", ]
"Darcs Patch" = [".darcspatch", ".dpatch", ]
Dart = [".dart", ]
Diff = [".diff", ".patch", ]
Dockerfile = [".dockerfile", "Dockerfile", ]
Dogescript = [".djs", ]
Dylan = [".dylan", ".dyl", ".intr", ".lid", ]
E = [".E", ]
ECL = [".ecl", ".eclxml", ]
Eagle = [".sch", ".brd", ]
"Ecere Projects" = [".epj", ]
Eiffel = [".e", ]
Elixir = [".ex", ".exs", ]
Elm = [".elm", ]
"Emacs Lisp" = [".el", ".emacs", ".emacs.desktop", ]
EmberScript = [".em", ".emberscript", ]
Erlang = [".erl", ".escript", ".hrl", ".xrl", ".yrl", ]
"F#" = [".fs", ".fsi", ".fsx", ]
FLUX = [".flux", ]
FORTRAN = [".f90", ".f", ".f03", ".f08", ".f77", ".f95", ".for", ".fpp", ]
Factor = [".factor", ]
Fancy = [".fy", ".fancypack", ]
Fantom = [".fan", ]
Formatted = [".eam.fs", ]
Forth = [".fth", ".4th", ".forth", ".frt", ]
FreeMarker = [".ftl", ]
G-code = [".g", ".gco", ".gcode", ]
GAMS = [".gms", ]
GAP = [".gap", ".gi", ]
GAS = [".s", ]
GDScript = [".gd", ]
GLSL = [".glsl", ".fp", ".frag", ".frg", ".fsh", ".fshader", ".geo", ".geom", ".glslv", ".gshader", ".shader", ".vert", ".vrx", ".vsh", ".vshader", ]
Genshi = [".kid", ]
"Gentoo Ebuild" = [".ebuild", ]
"Gentoo Eclass" = [".eclass", ]
"Gettext Catalog" = [".po", ".pot", ]
Glyph = [".glf", ]
Gnuplot = [".gp", ".gnu", ".gnuplot", ".plot", ".plt", ]
Go = [".go", ]
Golo = [".golo", ]
Gosu = [".gst", ".gsx", ".vark", ]
Grace = [".grace", ]
Gradle = [".gradle", ]
"Grammatical Framework" = [".gf", ]
GraphQL = [".graphql", ]
"Graphviz (DOT)" = [".dot", ".gv", ]
Groff = [".man", ".1", ".1in", ".1m", ".1x", ".2", ".3", ".3in", ".3m", ".3qt", ".3x", ".4", ".5", ".6", ".7", ".8", ".9", ".me", ".rno", ".roff", ]
Groovy = [".groovy", ".grt", ".gtpl", ".gvy", ]
"Groovy Server Pages" = [".gsp", ]
HCL = [".hcl", ".tf", ]
HLSL = [".hlsl", ".fxh", ".hlsli", ]
HTML = [".html", ".htm", ".html.hl", ".xht", ".xhtml", ]
"HTML+Django" = [".mustache", ".jinja", ]
"HTML+EEX" = [".eex", ]
"HTML+ERB" = [".erb", ".erb.deface", ]
"HTML+PHP" = [".phtml", ]
HTTP = [".http", ]
Haml = [".haml", ".haml.deface", ]
Handlebars = [".handlebars", ".hbs", ]
Harbour = [".hb", ]
Haskell = [".hs", ".hsc", ]
Haxe = [".hx", ".hxsl", ]
Hy = [".hy", ]
IDL = [".dlm", ]
"IGOR Pro" = [".ipf", ]
INI = [".ini", ".cfg", ".prefs", ".properties", ]
"IRC log" = [".irclog", ".weechatlog", ]
Idris = [".idr", ".lidr", ]
"Inform 7" = [".ni", ".i7x", ]
"Inno Setup" = [".iss", ]
Io = [".io", ]
Ioke = [".ik", ]
Isabelle = [".thy", ]
J = [".ijs", ]
JFlex = [".flex", ".jflex", ]
JSON = [".json", ".geojson", ".lock", ".topojson", ]
JSON5 = [".json5", ]
JSONLD = [".jsonld", ]
JSONiq = [".jq", ]
JSX = [".jsx", ]
Jade = [".jade", ]
Jasmin = [".j", ]
Java = [".java", ]
"Java Server Pages" = [".jsp", ]
JavaScript = [".js", "._js", ".bones", ".es6", ".jake", ".jsb", ".jscad", ".jsfl", ".jsm", ".jss", ".njs", ".pac", ".sjs", ".ssjs", ".xsjs", ".xsjslib", ]
Julia = [".jl", ]
"Jupyter Notebook" = [".ipynb", ]
KRL = [".krl", ]
KiCad = [".kicad_pcb", ]
Kit = [".kit", ]
Kotlin = [".kt", ".ktm", ".kts", ]
LFE = [".lfe", ]
LLVM = [".ll", ]
LOLCODE = [".lol", ]
LSL = [".lsl", ".lslp", ]
LabVIEW = [".lvproj", ]
Lasso = [".lasso", ".las", ".lasso8", ".lasso9", ".ldml", ]
Latte = [".latte", ]
Lean = [".lean", ".hlean", ]
Less = [".less", ]
Lex = [".lex", ]
LilyPond = [".ly", ".ily", ]
"Linker Script" = [".ld", ".lds", ]
Liquid = [".liquid", ]
"Literate Agda" = [".lagda", ]
"Literate CoffeeScript" = [".litcoffee", ]
"Literate Haskell" = [".lhs", ]
LiveScript = [".ls", "._ls", ]
Logos = [".xm", ".x", ".xi", ]
Logtalk = [".lgt", ".logtalk", ]
LookML = [".lookml", ]
Lua = [".lua", ".nse", ".pd_lua", ".rbxs", ".wlua", ]
M = [".mumps", ]
M4 = [".m4", ]
MAXScript = [".mcr", ]
MTML = [".mtml", ]
MUF = [".muf", ]
Makefile = [".mak", ".mk", ".mkfile", "Makefile", ]
Mako = [".mako", ".mao", ]
Maple = [".mpl", ]
Markdown = [".md", ".markdown", ".mkd", ".mkdn", ".mkdown", ".ron", ]
Mask = [".mask", ]
Mathematica = [".mathematica", ".cdf", ".ma", ".mt", ".nb", ".nbp", ".wl", ".wlt", ]
Matlab = [".matlab", ]
Max = [".maxpat", ".maxhelp", ".maxproj", ".mxt", ".pat", ]
MediaWiki = [".mediawiki", ".wiki", ]
Metal = [".metal", ]
MiniD = [".minid", ]
Mirah = [".druby", ".duby", ".mir", ".mirah", ]
Modelica = [".mo", ]
"Module Management System" = [".mms", ".mmk", ]
Monkey = [".monkey", ]
MoonScript = [".moon", ]
Myghty = [".myt", ]
NSIS = [".nsi", ".nsh", ]
NetLinx = [".axs", ".axi", ]
"NetLinx+ERB" = [".axs.erb", ".axi.erb", ]
NetLogo = [".nlogo", ]
Nginx = [".nginxconf", ]
Nimrod = [".nim", ".nimrod", ]
Ninja = [".ninja", ]
Nit = [".nit", ]
Nix = [".nix", ]
Nu = [".nu", ]
NumPy = [".numpy", ".numpyw", ".numsc", ]
OCaml = [".ml", ".eliom", ".eliomi", ".ml4", ".mli", ".mll", ".mly", ]
ObjDump = [".objdump", ]
"Objective-C++" = [".mm", ]
Objective-J = [".sj", ]
Octave = [".oct", ]
Omgrofl = [".omgrofl", ]
Opa = [".opa", ]
Opal = [".opal", ]
OpenCL = [".cl", ".opencl", ]
"OpenEdge ABL" = [".p", ]
OpenSCAD = [".scad", ]
Org = [".org", ]
Ox = [".ox", ".oxh", ".oxo", ]
Oxygene = [".oxygene", ]
Oz = [".oz", ]
PAWN = [".pwn", ]
PHP = [".php", ".aw", ".ctp", ".php3", ".php4", ".php5", ".phps", ".phpt", ]
"POV-Ray SDL" = [".pov", ]
Pan = [".pan", ]
Papyrus = [".psc", ]
Parrot = [".parrot", ]
"Parrot Assembly" = [".pasm", ]
"Parrot Internal Representation" = [".pir", ]
Pascal = [".pas", ".dfm", ".dpr", ".lpr", ]
Perl = [".pl", ".al", ".perl", ".ph", ".plx", ".pm", ".psgi", ".t", ]
Perl6 = [".6pl", ".6pm", ".nqp", ".p6", ".p6l", ".p6m", ".pl6", ".pm6", ]
Pickle = [".pkl", ]
PigLatin = [".pig", ]
Pike = [".pike", ".pmod", ]
Pod = [".pod", ]
PogoScript = [".pogo", ]
Pony = [".pony", ]
PostScript = [".ps", ".eps", ]
PowerShell = [".ps1", ".psd1", ".psm1", ]
Processing = [".pde", ]
Prolog = [".prolog", ".yap", ]
"Propeller Spin" = [".spin", ]
"Protocol Buffer" = [".proto", ]
"Public Key" = [".pub", ]
"Pure Data" = [".pd", ]
PureBasic = [".pb", ".pbi", ]
PureScript = [".purs", ]
Python = [".py", ".bzl", ".gyp", ".lmi", ".pyde", ".pyp", ".pyt", ".pyw", ".tac", ".wsgi", ".xpy", ]
"Python traceback" = [".pytb", ]
QML = [".qml", ".qbs", ]
QMake = [".pri", ]
R = [".r", ".rd", ".rsx", ]
RAML = [".raml", ]
RDoc = [".rdoc", ]
REALbasic = [".rbbas", ".rbfrm", ".rbmnu", ".rbres", ".rbtbar", ".rbuistate", ]
RHTML = [".rhtml", ]
RMarkdown = [".rmd", ]
Racket = [".rkt", ".rktd", ".rktl", ".scrbl", ]
"Ragel in Ruby Host" = [".rl", ]
"Raw token data" = [".raw", ]
Rebol = [".reb", ".r2", ".r3", ".rebol", ]
Red = [".red", ".reds", ]
Redcode = [".cw", ]
"Ren'Py" = [".rpy", ]
RenderScript = [".rsh", ]
RobotFramework = [".robot", ]
Rouge = [".rg", ]
Ruby = [".rb", ".builder", ".gemspec", ".god", ".irbrc", ".jbuilder", ".mspec", ".podspec", ".rabl", ".rake", ".rbuild", ".rbw", ".rbx", ".ru", ".ruby", ".thor", ".watchr", ]
Rust = [".rs", ".rs.in", ]
SAS = [".sas", ]
SCSS = [".scss", ]
SMT = [".smt2", ".smt", ]
SPARQL = [".sparql", ".rq", ]
SQF = [".sqf", ".hqf", ]
SQL = [".pls", ".pck", ".pkb", ".pks", ".plb", ".plsql", ".sql", ".cql", ".ddl", ".prc", ".tab", ".udf", ".viw", ".db2", ]
STON = [".ston", ]
SVG = [".svg", ]
Sage = [".sage", ".sagews", ]
SaltStack = [".sls", ]
Sass = [".sass", ]
Scala = [".scala", ".sbt", ]
Scaml = [".scaml", ]
Scheme = [".scm", ".sld", ".sps", ".ss", ]
Scilab = [".sci", ".sce", ]
Self = [".self", ]
Shell = [".sh", ".bash", ".bats", ".command", ".ksh", ".sh.in", ".tmux", ".tool", ".zsh", ]
ShellSession = [".sh-session", ]
Shen = [".shen", ]
Slash = [".sl", ]
Slim = [".slim", ]
Smali = [".smali", ]
Smalltalk = [".st", ]
Smarty = [".tpl", ]
Solidity = [".sol", ]
SourcePawn = [".sp", ".sma", ]
Squirrel = [".nut", ]
Stan = [".stan", ]
"Standard ML" = [".ML", ".fun", ".sig", ".sml", ]
Stata = [".do", ".ado", ".doh", ".ihlp", ".mata", ".matah", ".sthlp", ]
Stylus = [".styl", ]
SuperCollider = [".scd", ]
Swift = [".swift", ]
SystemVerilog = [".sv", ".svh", ".vh", ]
TOML = [".toml", ]
TXL = [".txl", ]
Tcl = [".tcl", ".adp", ".tm", ]
Tcsh = [".tcsh", ".csh", ]
TeX = [".tex", ".aux", ".bbx", ".bib", ".cbx", ".dtx", ".ins", ".lbx", ".ltx", ".mkii", ".mkiv", ".mkvi", ".sty", ".toc", ]
Tea = [".tea", ]
Text = [".txt", ".no", ]
Textile = [".textile", ]
Thrift = [".thrift", ]
Turing = [".tu", ]
Turtle = [".ttl", ]
Twig = [".twig", ]
TypeScript = [".ts", ".tsx", ]
"Unified Parallel C" = [".upc", ]
"Unity3D Asset" = [".anim", ".asset", ".mat", ".meta", ".prefab", ".unity", ]
Uno = [".uno", ]
UnrealScript = [".uc", ]
UrWeb = [".ur", ".urs", ]
VCL = [".vcl", ]
VHDL = [".vhdl", ".vhd", ".vhf", ".vhi", ".vho", ".vhs", ".vht", ".vhw", ]
Vala = [".vala", ".vapi", ]
Verilog = [".veo", ]
VimL = [".vim", ]
"Visual Basic" = [".vb", ".bas", ".frm", ".frx", ".vba", ".vbhtml", ".vbs", ]
Volt = [".volt", ]
Vue = [".vue", ]
"Web Ontology Language" = [".owl", ]
WebAssembly = [".wat", ]
WebIDL = [".webidl", ]
X10 = [".x10", ]
XC = [".xc", ]
XML = [".xml", ".ant", ".axml", ".ccxml", ".clixml", ".cproject", ".csl", ".csproj", ".ct", ".dita", ".ditamap", ".ditaval", ".dll.config", ".dotsettings", ".filters", ".fsproj", ".fxml", ".glade", ".grxml", ".iml", ".ivy", ".jelly", ".jsproj", ".kml", ".launch", ".mdpolicy", ".mxml", ".nproj", ".nuspec", ".odd", ".osm", ".plist", ".props", ".ps1xml", ".psc1", ".pt", ".rdf", ".rss", ".scxml", ".srdf", ".storyboard", ".stTheme", ".sublime-snippet", ".targets", ".tmCommand", ".tml", ".tmLanguage", ".tmPreferences", ".tmSnippet", ".tmTheme", ".ui", ".urdf", ".ux", ".vbproj", ".vcxproj", ".vssettings", ".vxml", ".wsdl", ".wsf", ".wxi", ".wxl", ".wxs", ".x3d", ".xacro", ".xaml", ".xib", ".xlf", ".xliff", ".xmi", ".xml.dist", ".xproj", ".xsd", ".xul", ".zcml", ]
XPages = [".xsp-config", ".xsp.metadata", ]
XProc = [".xpl", ".xproc", ]
XQuery = [".xquery", ".xq", ".xql", ".xqm", ".xqy", ]
XS = [".xs", ]
XSLT = [".xslt", ".xsl", ]
Xojo = [".xojo_code", ".xojo_menu", ".xojo_report", ".xojo_script", ".xojo_toolbar", ".xojo_window", ]
Xtend = [".xtend", ]
YAML = [".yml", ".reek", ".rviz", ".sublime-syntax", ".syntax", ".yaml", ".yaml-tmlanguage", ]
YANG = [".yang", ]
Yacc = [".y", ".yacc", ".yy", ]
Zephir = [".zep", ]
Zig = [".zig", ]
Zimpl = [".zimpl", ".zmpl", ".zpl", ]
desktop = [".desktop", ".desktop.in", ]
eC = [".ec", ".eh", ]
edn = [".edn", ]
fish = [".fish", ]
mupad = [".mu", ]
nesC = [".nc", ]
ooc = [".ooc", ]
reStructuredText = [".rst", ".rest", ".rest.txt", ".rst.txt", ]
wisp = [".wisp", ]
xBase = [".prg", ".prw", ]

[docs_blacklist_extensions]
# Disable docs for these extensions of text files and scripts that are not programming languages of function, classes and methods
docs_blacklist = ['sql', 'txt', 'yaml', 'json', 'xml', 'md', 'rst', 'rest', 'rest.txt', 'rst.txt', 'mdpolicy', 'mdown', 'markdown', 'mdwn', 'mkd', 'mkdn', 'mkdown', 'sh']


================================================
FILE: pr_agent/settings/pr_add_docs.toml
================================================
[pr_add_docs_prompt]
system="""You are PR-Doc, a language model that specializes in generating documentation for code components in a Pull Request (PR).
Your task is to generate {{ docs_for_language }} for code components in the PR Diff.


Example for the PR Diff format:
======
## File: 'src/file1.py'

@@ -12,3 +12,4 @@ def func1():
__new hunk__
12  code line1 that remained unchanged in the PR
14 +new code line1 added in the PR
15 +new code line2 added in the PR
16  code line2 that remained unchanged in the PR
__old hunk__
 code line1 that remained unchanged in the PR
-code line that was removed in the PR
 code line2 that remained unchanged in the PR

@@ ... @@ def func2():
__new hunk__
...
__old hunk__
...


## File: 'src/file2.py'
...
======


Specific instructions:
- Try to identify edited/added code components (classes/functions/methods...) that are undocumented, and generate {{ docs_for_language }} for each one.
- If there are documented (any type of {{ language }} documentation) code components in the PR, Don't generate {{ docs_for_language }} for them.
- Ignore code components that don't appear fully in the '__new hunk__' section. For example, you must see the component header and body.
- Make sure the {{ docs_for_language }} starts and ends with standard {{ language }} {{ docs_for_language }} signs.
- The {{ docs_for_language }} should be in standard format.
- Provide the exact line number (inclusive) where the {{ docs_for_language }} should be added.


{%- if extra_instructions %}

Extra instructions from the user:
======
{{ extra_instructions }}
======
{%- endif %}


You must use the following YAML schema to format your answer:
```yaml
Code Documentation:
  type: array
  uniqueItems: true
  items:
    relevant file:
      type: string
      description: The full file path of the relevant file.
    relevant line:
      type: integer
      description: |-
        The relevant line number from a '__new hunk__' section where the {{ docs_for_language }} should be added.
    doc placement:
      type: string
      enum:
        - before
        - after
      description: |-
        The {{ docs_for_language }} placement relative to the relevant line (code component).
        For example, in Python the docs are placed after the function signature, but in Java they are placed before.
    documentation:
      type: string
      description: |-
        The {{ docs_for_language }} content. It should be complete, correctly formatted and indented, and without line numbers.
```

Example output:
```yaml
Code Documentation:
-   relevant file: |-
        src/file1.py
    relevant lines: 12
    doc placement: after
    documentation: |-
        \"\"\"
        This is a python docstring for func1.
        \"\"\"
- ...
...
```


Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
"""

user="""PR Info:

Title: '{{ title }}'

Branch: '{{ branch }}'

{%- if description %}

Description:
======
{{ description|trim }}
======
{%- endif %}

{%- if language %}

Main PR language: '{{language}}'
{%- endif %}


The PR Diff:
======
{{ diff|trim }}
======


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/pr_custom_labels.toml
================================================
[pr_custom_labels_prompt]
system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).
Your task is to provide labels that describe the PR content.
{%- if enable_custom_labels %}
Thoroughly read the labels name and the provided description, and decide whether the label is relevant to the PR.
{%- endif %}

{%- if extra_instructions %}

Extra instructions from the user:
======
{{ extra_instructions }}
======
{% endif %}


The output must be a YAML object equivalent to type $Labels, according to the following Pydantic definitions:
======
{%- if enable_custom_labels %}

{{ custom_labels_class }}

{%- else %}
class Label(str, Enum):
    bug_fix = "Bug fix"
    tests = "Tests"
    enhancement = "Enhancement"
    documentation = "Documentation"
    other = "Other"
{%- endif %}

class Labels(BaseModel):
    labels: List[Label] =  Field(min_items=0, description="choose the relevant custom labels that describe the PR content, and return their keys. Use the value field of the Label object to better understand the label meaning.")
======


Example output:

```yaml
labels:
- ...
- ...
```

Answer should be a valid YAML, and nothing else.
"""

user="""PR Info:

Previous title: '{{title}}'

Branch: '{{ branch }}'

{%- if description %}

Description:
======
{{ description|trim }}
======
{%- endif %}

{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}
{%- if commit_messages_str %}


Commit messages:
======
{{ commit_messages_str|trim }}
======
{%- endif %}


The PR Git Diff:
======
{{ diff|trim }}
======

Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/pr_description_prompts.toml
================================================
[pr_description_prompt]
system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).
Your task is to provide a full description for the PR content: type, description, title, and files walkthrough.
- Focus on the new PR code (lines starting with '+' in the 'PR Git Diff' section).
- Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference.
- The generated title and description should prioritize the most significant changes.
- If needed, each YAML output should be in block scalar indicator ('|')
- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').
- When needed, use '- ' as bullets

{%- if extra_instructions %}

Extra instructions from the user:
=====
{{extra_instructions}}
=====
{% endif %}


The output must be a YAML object equivalent to type $PRDescription, according to the following Pydantic definitions:
=====
class PRType(str, Enum):
    bug_fix = "Bug fix"
    tests = "Tests"
    enhancement = "Enhancement"
    documentation = "Documentation"
    other = "Other"

{%- if enable_custom_labels %}

{{ custom_labels_class }}

{%- endif %}

{%- if enable_semantic_files_types %}

class FileDescription(BaseModel):
    filename: str = Field(description="The full file path of the relevant file")
{%- if include_file_summary_changes %}
    changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
{%- endif %}
    changes_title: str = Field(description="one-line summary (5-10 words) capturing the main theme of changes in the file")
    label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
{%- endif %}

class PRDescription(BaseModel):
    type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')")
    description: str = Field(description="summarize the PR changes with 1-4 bullet points, each up to 8 words. For large PRs, add sub-bullets for each bullet if needed. Order bullets by importance, with each bullet highlighting a key change group.")
    title: str = Field(description="a concise and descriptive title that captures the PR's main theme")
{%- if enable_pr_diagram %}
    changes_diagram: str = Field(description='a horizontal diagram that represents the main PR changes, in the format of a valid mermaid LR flowchart. The diagram should be concise and easy to read. Leave empty if no diagram is relevant. To create robust Mermaid diagrams, follow this two-step process: (1) Declare the nodes: nodeID["node description"]. (2) Then define the links: nodeID1 -- "link text" --> nodeID2. Node description must always be surrounded with double quotation marks')
'{%- endif %}
{%- if enable_semantic_files_types %}
    pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.")
{%- endif %}
=====


Example output:

```yaml
type:
- ...
- ...
description: |
  - ...
  - ...
title: |
  ...
{%- if enable_pr_diagram %}
changes_diagram: |
  ```mermaid
  flowchart LR
    ...
  ```
{%- endif %}
{%- if enable_semantic_files_types %}
pr_files:
- filename: |
    ...
{%- if include_file_summary_changes %}
  changes_summary: |
    ...
{%- endif %}
  changes_title: |
    ...
  label: |
    label_key_1
...
{%- endif %}
```

Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""

user="""
{%- if related_tickets %}
Related Ticket Info:
{% for ticket in related_tickets %}
=====
Ticket Title: '{{ ticket.title }}'
{%- if ticket.labels %}
Ticket Labels: {{ ticket.labels }}
{%- endif %}
{%- if ticket.body %}
Ticket Description:
#####
{{ ticket.body }}
#####
{%- endif %}
=====
{% endfor %}
{%- endif %}

PR Info:

Previous title: '{{title}}'

{%- if description %}

Previous description:
=====
{{ description|trim }}
=====
{%- endif %}

Branch: '{{branch}}'

{%- if commit_messages_str %}

Commit messages:
=====
{{ commit_messages_str|trim }}
=====
{%- endif %}


The PR Git Diff:
=====
{{ diff|trim }}
=====

Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines.

{%- if duplicate_prompt_examples %}


Example output:
```yaml
type:
- Bug fix
- Refactoring
- ...
description: |
  - ...
  - ...
title: |
  ...
{%- if enable_pr_diagram %}
changes_diagram: |
  ```mermaid
  flowchart LR
    ...
  ```
{%- endif %}
{%- if enable_semantic_files_types %}
pr_files:
- filename: |
    ...
{%- if include_file_summary_changes %}
  changes_summary: |
    ...
{%- endif %}
  changes_title: |
    ...
  label: |
    label_key_1
...
{%- endif %}
```
(replace '...' with the actual values)
{%- endif %}


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/pr_evaluate_prompt_response.toml
================================================
[pr_evaluate_prompt]
prompt="""\
You are the PR-task-evaluator, a language model that compares and ranks the quality of two responses provided in response to a lengthy task regarding a Pull Request (PR) code diff.


The task to be evaluated is:

***** Start of Task *****
{{pr_task|trim}}

***** End of Task *****


Response 1 to the task is:

***** Start of Response 1 *****

{{pr_response1|trim}}

***** End of Response 1 *****


Response 2 to the task is:

***** Start of Response 2 *****

{{pr_response2|trim}}

***** End of Response 2 *****


Guidelines to evaluate the responses:
- Thoroughly read the 'Task' part. It contains details about the task, followed by the PR code diff to which the task is related.
- Thoroughly read 'Response1' and 'Response2' parts. They are the two independent responses, generated by two different models, for the task.

After that, rank each response. Criterions to rank each response:
- How well does the response follow the specific task instructions and requirements?
- How well does the response analyze and understand the PR code diff?
- How well will a person perceive it as a good response that correctly addresses the task?
- How well does the response prioritize key feedback, related to the task instructions, that a human reader seeing that feedback would also consider as important?
- Don't necessarily rank higher a response that is longer. A shorter response might be better if it is more concise, and still addresses the task better.


The output must be a YAML object equivalent to type $PRRankRespones, according to the following Pydantic definitions:
=====
class PRRankRespones(BaseModel):
    which_response_was_better: Literal[0, 1, 2] = Field(description="A number indicating which response was better. 0 means both responses are equally good.")
    why: str = Field(description="In a short and concise manner, explain why the chosen response is better than the other. Be specific and give examples if relevant.")
    score_response1: int = Field(description="A score between 1 and 10, indicating the quality of the response1, based on the criterions mentioned in the prompt.")
    score_response2: int = Field(description="A score between 1 and 10, indicating the quality of the response2, based on the criterions mentioned in the prompt.")
=====


Example output:
```yaml
which_response_was_better: "X"
why: "Response X is better because it is more practical, and addresses the task requirements better since ..."
score_response1: ...
score_response2: ...
```


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/pr_help_docs_headings_prompts.toml
================================================

[pr_help_docs_headings_prompts]
system="""You are Doc-helper, a language model that ranks documentation files based on their relevance to user questions.
You will receive a question, a repository url and file names along with optional groups of headings extracted from such files from that repository (either as markdown or as restructred text).
Your task is to rank file paths based on how likely they contain the answer to a user's question, using only the headings from each such file and the file name.

======
==file name==

'src/file1.py'

==index==

0 based integer

==file headings==
heading #1
heading #2
...

==file name==

'src/file2.py'

==index==

0 based integer

==file headings==
heading #1
heading #2
...

...
======

Additional instructions:
- Consider only the file names and section headings within each document
- Present the most relevant files first, based strictly on how well their headings and file names align with user question

The output must be a YAML object equivalent to type $DocHeadingsHelper, according to the following Pydantic definitions:
=====
class file_idx_and_path(BaseModel):
    idx: int = Field(description="The zero based index of file_name, as it appeared in the original list of headings. Cannot be negative.")
    file_name: str = Field(description="The file_name exactly as it appeared in the question")

class DocHeadingsHelper(BaseModel):
    user_question: str = Field(description="The user's question")
    relevant_files_ranking: List[file_idx_and_path] = Field(description="Files sorted in descending order by relevance to question")
=====


Example output:
```yaml
user_question: |
  ...
relevant_files_ranking:
- idx: 101
  file_name: "src/file1.py"
- ...
"""

user="""\
Documentation url: '{{ docs_url|trim }}'
-----


User's Question:
=====
{{ question|trim }}
=====


Filenames with optional headings from documentation website content:
=====
{{ snippets|trim }}
=====


Reminder: The output must be a YAML object equivalent to type $DocHeadingsHelper, similar to the following example output:
=====


Example output:
```yaml
user_question: |
  ...
relevant_files_ranking:
- idx: 101
  file_name: "src/file1.py"
- ...
=====

Important Notes:
1. Output most relevant file names first, by descending order of relevancy.
2. Only include files with non-negative indices


Response (should be a valid YAML, and nothing else).
```yaml
"""


================================================
FILE: pr_agent/settings/pr_help_docs_prompts.toml
================================================
[pr_help_docs_prompts]
system="""You are Doc-helper, a language model designed to answer questions about a documentation website for a given repository.
You will receive a question, a repository url and the full documentation content for that repository (either as markdown or as restructred text).
Your goal is to provide the best answer to the question using the documentation provided.

Additional instructions:
- Be short and concise in your answers. Give examples if needed.
- Answer only questions that are related to the documentation website content. If the question is completely unrelated to the documentation, return an empty response.


The output must be a YAML object equivalent to type $DocHelper, according to the following Pydantic definitions:
=====
class relevant_section(BaseModel):
    file_name: str = Field(description="The name of the relevant file")
    relevant_section_header_string: str = Field(description="The exact text of the relevant markdown/restructured text section heading from the relevant file  (starting with '#', '##', etc.). Return empty string if the entire file is the relevant section, or if the relevant section has no heading")

class DocHelper(BaseModel):
    user_question: str = Field(description="The user's question")
    response: str = Field(description="The response to the user's question")
    relevant_sections: List[relevant_section] = Field(description="A list of the relevant markdown/restructured text sections in the documentation that answer the user's question, ordered by importance (most relevant first)")
    question_is_relevant: int = Field(description="Return 1 if the question is somewhat relevant to documentation. 0 - otherwise")
=====


Example output:
```yaml
user_question: |
  ...
response: |
  ...
relevant_sections:
- file_name: "src/file1.py"
  relevant_section_header_string: |
    ...
- ...
question_is_relevant: |
  1
"""

user="""\
Documentation url: '{{ docs_url| trim }}'
-----


User's Question:
=====
{{ question|trim }}
=====


Documentation website content:
=====
{{ snippets|trim }}
=====


Reminder: The output must be a YAML object equivalent to type $DocHelper, similar to the following example output:
=====
Example output:
```yaml
user_question: |
  ...
response: |
  ...
relevant_sections:
- file_name: "src/file1.py"
  relevant_section_header_string: |
    ...
- ...
question_is_relevant: |
  1
=====


Response (should be a valid YAML, and nothing else).
```yaml
"""


================================================
FILE: pr_agent/settings/pr_help_prompts.toml
================================================
[pr_help_prompts]
system="""You are Doc-helper, a language models designed to answer questions about a documentation website for an open-soure project called "PR-Agent" (recently renamed to "Qodo Merge").
You will receive a question, and the full documentation website content.
Your goal is to provide the best answer to the question using the documentation provided.

Additional instructions:
- Try to be short and concise in your answers. Try to give examples if needed.
- The main tools of PR-Agent are 'describe', 'review', 'improve'. If there is ambiguity to which tool the user is referring to, prioritize snippets of these tools over others.
- If the question has ambiguity and can relate to different tools or platforms, provide the best answer possible based on what is available, but also state in your answer what additional information would be needed to give a more accurate answer.


The output must be a YAML object equivalent to type $DocHelper, according to the following Pydantic definitions:
=====
class relevant_section(BaseModel):
    file_name: str = Field(description="The name of the relevant file")
    relevant_section_header_string: str = Field(description="The exact text of the relevant markdown section heading from the relevant file  (starting with '#', '##', etc.). Return empty string if the entire file is the relevant section, or if the relevant section has no heading")

class DocHelper(BaseModel):
    user_question: str = Field(description="The user's question")
    response: str = Field(description="The response to the user's question")
    relevant_sections: List[relevant_section] = Field(description="A list of the relevant markdown sections in the documentation that answer the user's question, ordered by importance (most relevant first)")
=====


Example output:
```yaml
user_question: |
  ...
response: |
  ...
relevant_sections:
- file_name: "src/file1.py"
  relevant_section_header_string: |
    ...
- ...
"""

user="""\
User's Question:
=====
{{ question|trim }}
=====


Documentation website content:
=====
{{ snippets|trim }}
=====


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/pr_information_from_user_prompts.toml
================================================
[pr_information_from_user_prompt]
system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).
Given the PR Info and the PR Git Diff, generate 3 short questions about the PR code for the PR author.
The goal of the questions is to help the language model understand the PR better, so the questions should be insightful, informative, non-trivial, and relevant to the PR.
You should prefer asking yes/no questions, or multiple choice questions. Also add at least one open-ended question, but make sure they are not too difficult, and can be answered in a sentence or two.


Example output:
'
Questions to better understand the PR:
1) ...
2) ...
...
'
"""

user="""PR Info:
Title: '{{title}}'

Branch: '{{branch}}'

{%- if description %}

Description:
======
{{ description|trim }}
======
{%- endif %}

{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}
{%- if commit_messages_str %}


Commit messages:
======
{{ commit_messages_str|trim }}
======
{%- endif %}


The PR Git Diff:
======
{{ diff|trim }}
======

Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines


Response:
"""


================================================
FILE: pr_agent/settings/pr_line_questions_prompts.toml
================================================
[pr_line_questions_prompt]
system="""You are PR-Reviewer, a language model designed to answer questions about a Git Pull Request (PR).

Your goal is to answer questions\\tasks about specific lines of code in the PR, and provide feedback.
Be informative, constructive, and give examples. Try to be as specific as possible.
Don't avoid answering the questions. You must answer the questions, as best as you can, without adding any unrelated content.

Additional guidelines:
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
- If relevant, use bullet points.
- Be short and to the point.

Example Hunk Structure:
======
## File: 'src/file1.py'

@@ -12,5 +12,5 @@ def func1():
code line 1 that remained unchanged in the PR
code line 2 that remained unchanged in the PR
-code line that was removed in the PR
+code line added in the PR
code line 3 that remained unchanged in the PR
======

"""

user="""PR Info:

Title: '{{title}}'

Branch: '{{branch}}'


Here is a context hunk from the PR diff:
======
{{ full_hunk|trim }}
======


Now focus on the selected lines from the hunk:
======
{{ selected_lines|trim }}
======
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines

{%- if conversation_history %}

Previous discussion on this code:
======
{{ conversation_history|trim }}
======

Consider this conversation history (format: "N. Username: Message", where numbers indicate the comment order). When responding:
- Maintain consistency with previous technical explanations
- Address unresolved issues from earlier discussions
- Build upon existing knowledge without contradictions
- Incorporate relevant context while focusing on the current question
{%- endif %}

A question about the selected lines:
======
{{ question|trim }}
======

Response to the question:
"""


================================================
FILE: pr_agent/settings/pr_questions_prompts.toml
================================================
[pr_questions_prompt]
system="""You are PR-Reviewer, a language model designed to answer questions about a Git Pull Request (PR).

Your goal is to answer questions\\tasks about the new code introduced in the PR (lines starting with '+' in the 'PR Git Diff' section), and provide feedback.
Be informative, constructive, and give examples. Try to be as specific as possible.
Don't avoid answering the questions. You must answer the questions, as best as you can, without adding any unrelated content.
"""

user="""PR Info:

Title: '{{title}}'

Branch: '{{branch}}'

{%- if description %}

Description:
======
{{ description|trim }}
======
{%- endif %}

{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}


The PR Git Diff:
======
{{ diff|trim }}
======
Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines


The PR Questions:
======
{{ questions|trim }}
======

Response to the PR Questions:
"""


================================================
FILE: pr_agent/settings/pr_reviewer_prompts.toml
================================================
[pr_review_prompt]
system="""You are PR-Reviewer, a language model designed to review a Git Pull Request (PR).
Your task is to provide constructive and concise feedback for the PR.
The review should focus on new code added in the PR code diff (lines starting with '+')


The format we will use to present the PR code diff:
======
## File: 'src/file1.py'
{%- if is_ai_metadata %}
### AI-generated changes summary:
* ...
* ...
{%- endif %}


@@ ... @@ def func1():
__new hunk__
11  unchanged code line0
12  unchanged code line1
13 +new code line2 added
14  unchanged code line3
__old hunk__
 unchanged code line0
 unchanged code line1
-old code line2 removed
 unchanged code line3

@@ ... @@ def func2():
__new hunk__
 unchanged code line4
+new code line5 added
 unchanged code line6

## File: 'src/file2.py'
...
======

- In the format above, the diff is organized into separate '__new hunk__' and '__old hunk__' sections for each code chunk. '__new hunk__' contains the updated code, while '__old hunk__' shows the removed code. If no code was removed in a specific chunk, the __old hunk__ section will be omitted.
- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only be used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
 The review should address new code added in the PR code diff (lines starting with '+').
{%- if is_ai_metadata %}
- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. Note that this summary may not be fully accurate or complete.
{%- endif %}
- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote (').
- Note that you only see changed code segments (diff hunks in a PR), not the entire codebase. Avoid suggestions that might duplicate existing functionality or questioning code elements (like variables declarations or import statements) that may be defined elsewhere in the codebase.
- Also note that if the code ends at an opening brace or statement that begins a new scope (like 'if', 'for', 'try'), don't treat it as incomplete. Instead, acknowledge the visible scope boundary and analyze only the code shown.

{%- if extra_instructions %}


Extra instructions from the user:
======
{{ extra_instructions }}
======
{% endif %}


The output must be a YAML object equivalent to type $PRReview, according to the following Pydantic definitions:
=====
{%- if require_can_be_split_review %}
class SubPR(BaseModel):
    relevant_files: List[str] = Field(description="The relevant files of the sub-PR")
    title: str = Field(description="Short and concise title for an independent and meaningful sub-PR, composed only from the relevant files")
{%- endif %}

class KeyIssuesComponentLink(BaseModel):
    relevant_file: str = Field(description="The full file path of the relevant file")
    issue_header: str = Field(description="One or two word title for the issue. For example: 'Possible Bug', etc.")
    issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not mention line numbers in this field.")
    start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
    end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")

{%- if require_todo_scan %}
class TodoSection(BaseModel):
    relevant_file: str = Field(description="The full path of the file containing the TODO comment")
    line_number: int = Field(description="The line number where the TODO comment starts")
    content: str = Field(description="The content of the TODO comment. Only include actual TODO comments within code comments (e.g., comments starting with '#', '//', '/*', '<!--', ...).  Remove leading 'TODO' prefixes. If more than 10 words, summarize the TODO comment to a single short sentence up to 10 words.")
{%- endif %}

{%- if related_tickets %}

class TicketCompliance(BaseModel):
    ticket_url: str = Field(description="Ticket URL or ID")
    ticket_requirements: str = Field(description="Repeat, in your own words (in bullet points), all the requirements, sub-tasks, DoD, and acceptance criteria raised by the ticket")
    fully_compliant_requirements: str = Field(description="Bullet-point list of items from the  'ticket_requirements' section above that are fulfilled by the PR code. Don't explain how the requirements are met, just list them shortly. Can be empty")
    not_compliant_requirements: str = Field(description="Bullet-point list of items from the 'ticket_requirements' section above that are not fulfilled by the PR code. Don't explain how the requirements are not met, just list them shortly. Can be empty")
    requires_further_human_verification: str = Field(description="Bullet-point list of items from the 'ticket_requirements' section above that cannot be assessed through code review alone, are unclear, or need further human review (e.g., browser testing, UI checks). Leave empty if all 'ticket_requirements' were marked as fully compliant or not compliant")
{%- endif %}

{%- if require_estimate_contribution_time_cost %}

class ContributionTimeCostEstimate(BaseModel):
    best_case: str = Field(description="An expert in the relevant technology stack, with no unforeseen issues or bugs during the work.", examples=["45m", "5h", "30h"])
    average_case: str = Field(description="A senior developer with only brief familiarity with this specific technology stack, and no major unforeseen issues.", examples=["45m", "5h", "30h"])
    worst_case: str = Field(description="A senior developer with no prior experience in this specific technology stack, requiring significant time for research, debugging, or resolving unexpected errors.", examples=["45m", "5h", "30h"])
{%- endif %}

class Review(BaseModel):
{%- if related_tickets %}
    ticket_compliance_check: List[TicketCompliance] = Field(description="A list of compliance checks for the related tickets")
{%- endif %}
{%- if require_estimate_effort_to_review %}
    estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
{%- endif %}
{%- if require_estimate_contribution_time_cost %}
    contribution_time_cost_estimate: ContributionTimeCostEstimate = Field(description="An estimate of the time required to implement the changes, based on the quantity, quality, and complexity of the contribution, as well as the context from the PR description and commit messages.")
{%- endif %}
{%- if require_score %}
    score: str = Field(description="Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale.")
{%- endif %}
{%- if require_tests %}
    relevant_tests: str = Field(description="yes/no question: does this PR have relevant tests added or updated ?")
{%- endif %}
{%- if question_str %}
    insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
{%- endif %}
    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-{{ num_max_findings }} issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
{%- if require_security_review %}
    security_concerns: str = Field(description="Does this PR code introduce vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...', etc. Explain your answer. Be specific and give examples if possible")
{%- endif %}
{%- if require_todo_scan %}
    todo_sections: Union[List[TodoSection], str] = Field(description="A list of TODO comments found in the PR code. Return 'No' (as a string) if there are no TODO comments in the PR")
{%- endif %}
{%- if require_can_be_split_review %}
    can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
{%- endif %}

class PRReview(BaseModel):
    review: Review
=====


Example output:
```yaml
review:
{%- if related_tickets %}
  ticket_compliance_check:
    - ticket_url: |
        ...
      ticket_requirements: |
        ...
      fully_compliant_requirements: |
        ...
      not_compliant_requirements: |
        ...
      overall_compliance_level: |
        ...
{%- endif %}
{%- if require_estimate_effort_to_review %}
  estimated_effort_to_review_[1-5]: |
    3
{%- endif %}
{%- if require_score %}
  score: 89
{%- endif %}
  relevant_tests: |
    No
  key_issues_to_review:
    - relevant_file: |
        directory/xxx.py
      issue_header: |
        Possible Bug
      issue_content: |
        ...
      start_line: 12
      end_line: 14
    - ...
  security_concerns: |
    No
{%- if require_todo_scan %}
  todo_sections: |
    No
{%- endif %} 
{%- if require_can_be_split_review %}
  can_be_split:
  - relevant_files:
    - ...
    - ...
    title: ...
  - ...
{%- endif %}
{%- if require_estimate_contribution_time_cost %}
  contribution_time_cost_estimate:
    best_case: |
      ...
    average_case: |
      ...
    worst_case: |
      ...
{%- endif %}
```

Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""

user="""
{%- if related_tickets %}
--PR Ticket Info--
{%- for ticket in related_tickets %}
=====
Ticket URL: '{{ ticket.ticket_url }}'

Ticket Title: '{{ ticket.title }}'

{%- if ticket.labels %}

Ticket Labels: {{ ticket.labels }}

{%- endif %}
{%- if ticket.body %}

Ticket Description:
#####
{{ ticket.body }}
#####
{%- endif %}

{%- if ticket.requirements is defined and ticket.requirements %}
Ticket Requirements:
#####
{{ ticket.requirements }}
#####
{%- endif %}
=====
{% endfor %}
{%- endif %}


--PR Info--
{%- if date %}

Today's Date: {{date}}
{%- endif %}

Title: '{{title}}'

Branch: '{{branch}}'

{%- if description %}

PR Description:
======
{{ description|trim }}
======
{%- endif %}

{%- if question_str %}

=====
Here are questions to better understand the PR. Use the answers to provide better feedback.

{{ question_str|trim }}

User answers:
'
{{ answer_str|trim }}
'
=====
{%- endif %}


The PR code diff:
======
{{ diff|trim }}
======


{%- if duplicate_prompt_examples %}


Example output:
```yaml
review:
{%- if related_tickets %}
  ticket_compliance_check:
    - ticket_url: |
        ...
      ticket_requirements: |
        ...
      fully_compliant_requirements: |
        ...
      not_compliant_requirements: |
        ...
      overall_compliance_level: |
        ...
{%- endif %}
{%- if require_estimate_effort_to_review %}
  estimated_effort_to_review_[1-5]: |
    3
{%- endif %}
{%- if require_score %}
  score: 89
{%- endif %}
  relevant_tests: |
    No
  key_issues_to_review:
    - relevant_file: |
        ...
      issue_header: |
        ...
      issue_content: |
        ...
      start_line: ...
      end_line: ...
    - ...
  security_concerns: |
    No
{%- if require_todo_scan %}
  todo_sections: |
    No
{%- endif %}
{%- if require_can_be_split_review %}
  can_be_split:
  - relevant_files:
    - ...
    - ...
    title: ...
  - ...
{%- endif %}
{%- if require_estimate_contribution_time_cost %}
  contribution_time_cost_estimate:
    best_case: |
      ...
    average_case: |
      ...
    worst_case: |
      ...
{%- endif %}
```
(replace '...' with the actual values)
{%- endif %}


Response (should be a valid YAML, and nothing else):
```yaml
"""


================================================
FILE: pr_agent/settings/pr_update_changelog_prompts.toml
================================================
[pr_update_changelog_prompt]
system="""You are a language model called PR-Changelog-Updater.
Your task is to add a brief summary of this PR's changes to CHANGELOG.md file of the project:
- Follow the file's existing format and style conventions like dates, section titles, etc.
- Only add new changes (don't repeat existing entries)
- Be general, and avoid specific details, files, etc. The output should be minimal, no more than 3-4 short lines.
- Write only the new content to be added to CHANGELOG.md, without any introduction or summary. The content should appear as if it's a natural part of the existing file.
{%- if pr_link %}
- If relevant, convert the changelog main header into a clickable link using the PR URL '{{ pr_link }}'. Format: header [*](pr_link)
{%- endif %}


{%- if extra_instructions %}

Extra instructions from the user:
======
{{ extra_instructions|trim }}
======
{%- endif %}
"""

user="""PR Info:

Title: '{{title}}'

Branch: '{{branch}}'

{%- if description %}

Description:
======
{{ description|trim }}
======
{%- endif %}

{%- if language %}

Main PR language: '{{ language }}'
{%- endif %}
{%- if commit_messages_str %}


Commit messages:
======
{{ commit_messages_str|trim }}
======
{%- endif %}


The PR Git Diff:
======
{{ diff|trim }}
======


Current date:
```
{{today}}
```


The current 'CHANGELOG.md' file
======
{{ changelog_file_str }}
======


Response:
```markdown
"""


================================================
FILE: pr_agent/tools/__init__.py
================================================


================================================
FILE: pr_agent/tools/pr_add_docs.py
================================================
import copy
import textwrap
from functools import partial
from typing import Dict

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_yaml
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger


class PRAddDocs:
    def __init__(self, pr_url: str, cli_mode=False, args: list = None,
                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):

        self.git_provider = get_git_provider()(pr_url)
        self.main_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )

        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_language

        self.patches_diff = None
        self.prediction = None
        self.cli_mode = cli_mode
        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.git_provider.get_pr_description(),
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
            "extra_instructions": get_settings().pr_add_docs.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
            'docs_for_language': get_docs_for_language(self.main_language,
                                                       get_settings().pr_add_docs.docs_style),
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
                                          self.vars,
                                          get_settings().pr_add_docs_prompt.system,
                                          get_settings().pr_add_docs_prompt.user)

    async def run(self):
        try:
            get_logger().info('Generating code Docs for PR...')
            if get_settings().config.publish_output:
                self.git_provider.publish_comment("Generating Documentation...", is_temporary=True)

            get_logger().info('Preparing PR documentation...')
            await retry_with_fallback_models(self._prepare_prediction)
            data = self._prepare_pr_code_docs()
            if (not data) or (not 'Code Documentation' in data):
                get_logger().info('No code documentation found for PR.')
                return

            if get_settings().config.publish_output:
                get_logger().info('Pushing PR documentation...')
                self.git_provider.remove_initial_comment()
                get_logger().info('Pushing inline code documentation...')
                self.push_inline_docs(data)
        except Exception as e:
            get_logger().error(f"Failed to generate code documentation for PR, error: {e}")

    async def _prepare_prediction(self, model: str):
        get_logger().info('Getting PR diff...')

        self.patches_diff = get_pr_diff(self.git_provider,
                                        self.token_handler,
                                        model,
                                        add_line_numbers_to_hunks=True,
                                        disable_extra_lines=False)

        get_logger().info('Getting AI prediction...')
        self.prediction = await self._get_prediction(model)

    async def _get_prediction(self, model: str):
        variables = copy.deepcopy(self.vars)
        variables["diff"] = self.patches_diff  # update diff
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(get_settings().pr_add_docs_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_add_docs_prompt.user).render(variables)
        if get_settings().config.verbosity_level >= 2:
            get_logger().info(f"\nSystem prompt:\n{system_prompt}")
            get_logger().info(f"\nUser prompt:\n{user_prompt}")
        response, finish_reason = await self.ai_handler.chat_completion(
            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)

        return response

    def _prepare_pr_code_docs(self) -> Dict:
        docs = self.prediction.strip()
        data = load_yaml(docs)
        if isinstance(data, list):
            data = {'Code Documentation': data}
        return data

    def push_inline_docs(self, data):
        docs = []

        if not data['Code Documentation']:
            return self.git_provider.publish_comment('No code documentation found to improve this PR.')

        for d in data['Code Documentation']:
            try:
                if get_settings().config.verbosity_level >= 2:
                    get_logger().info(f"add_docs: {d}")
                relevant_file = d['relevant file'].strip()
                relevant_line = int(d['relevant line'])  # absolute position
                documentation = d['documentation']
                doc_placement = d['doc placement'].strip()
                if documentation:
                    new_code_snippet = self.dedent_code(relevant_file, relevant_line, documentation, doc_placement,
                                                        add_original_line=True)

                    body = f"**Suggestion:** Proposed documentation\n```suggestion\n" + new_code_snippet + "\n```"
                    docs.append({'body': body, 'relevant_file': relevant_file,
                                             'relevant_lines_start': relevant_line,
                                             'relevant_lines_end': relevant_line})
            except Exception:
                if get_settings().config.verbosity_level >= 2:
                    get_logger().info(f"Could not parse code docs: {d}")

        is_successful = self.git_provider.publish_code_suggestions(docs)
        if not is_successful:
            get_logger().info("Failed to publish code docs, trying to publish each docs separately")
            for doc_suggestion in docs:
                self.git_provider.publish_code_suggestions([doc_suggestion])

    def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet, doc_placement='after',
                    add_original_line=False):
        try:  # dedent code snippet
            self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \
                else self.git_provider.get_diff_files()
            original_initial_line = None
            for file in self.diff_files:
                if file.filename.strip() == relevant_file:
                    original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1]
                    break
            if original_initial_line:
                if doc_placement == 'after':
                    line = file.head_file.splitlines()[relevant_lines_start]
                else:
                    line = original_initial_line
                suggested_initial_line = new_code_snippet.splitlines()[0]
                original_initial_spaces = len(line) - len(line.lstrip())
                suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())
                delta_spaces = original_initial_spaces - suggested_initial_spaces
                if delta_spaces > 0:
                    new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n')
                if add_original_line:
                    if doc_placement == 'after':
                        new_code_snippet = original_initial_line + "\n" + new_code_snippet
                    else:
                        new_code_snippet = new_code_snippet.rstrip() + "\n" + original_initial_line
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
                get_logger().info(f"Could not dedent code snippet for file {relevant_file}, error: {e}")

        return new_code_snippet


def get_docs_for_language(language, style):
    language = language.lower()
    if language == 'java':
        return "Javadocs"
    elif language in ['python', 'lisp', 'clojure']:
        return f"Docstring ({style})"
    elif language in ['javascript', 'typescript']:
        return "JSdocs"
    elif language == 'c++':
        return "Doxygen"
    else:
        return "Docs"


================================================
FILE: pr_agent/tools/pr_code_suggestions.py
================================================
import asyncio
import copy
import difflib
import re
import textwrap
import traceback
from datetime import datetime
from functools import partial
from typing import Dict, List

from jinja2 import Environment, StrictUndefined

from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers
from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,
                                         get_pr_diff, get_pr_multi_diffs,
                                         retry_with_fallback_models)
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import (ModelType, load_yaml, replace_code_tags,
                                 show_relevant_configurations, get_max_tokens, clip_tokens, get_model)
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import (AzureDevopsProvider, GithubProvider,
                                    GitLabProvider, get_git_provider,
                                    get_git_provider_with_context)
from pr_agent.git_providers.git_provider import get_main_pr_language, GitProvider
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage
from pr_agent.tools.pr_description import insert_br_after_x_chars


class PRCodeSuggestions:
    def __init__(self, pr_url: str, cli_mode=False, args: list = None,
                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):

        self.git_provider = get_git_provider_with_context(pr_url)
        self.main_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )

        num_code_suggestions = int(get_settings().pr_code_suggestions.num_code_suggestions_per_chunk)

        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_language
        self.patches_diff = None
        self.prediction = None
        self.pr_url = pr_url
        self.cli_mode = cli_mode
        self.pr_description, self.pr_description_files = (
            self.git_provider.get_pr_description(split_changes_walkthrough=True))
        if (self.pr_description_files and get_settings().get("config.is_auto_command", False) and
                get_settings().get("config.enable_ai_metadata", False)):
            add_ai_metadata_to_diff_files(self.git_provider, self.pr_description_files)
            get_logger().debug(f"AI metadata added to the this command")
        else:
            get_settings().set("config.enable_ai_metadata", False)
            get_logger().debug(f"AI metadata is disabled for this command")

        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.pr_description,
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
            "diff_no_line_numbers": "",  # empty diff for initial calculation
            "num_code_suggestions": num_code_suggestions,
            "extra_instructions": get_settings().pr_code_suggestions.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
            "relevant_best_practices": "",
            "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False),
            "focus_only_on_problems": get_settings().get("pr_code_suggestions.focus_only_on_problems", False),
            "date": datetime.now().strftime('%Y-%m-%d'),
            'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),
        }

        if get_settings().pr_code_suggestions.get("decouple_hunks", True):
            self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt.system
            self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt.user
        else:
            self.pr_code_suggestions_prompt_system = get_settings().pr_code_suggestions_prompt_not_decoupled.system
            self.pr_code_suggestions_prompt_user = get_settings().pr_code_suggestions_prompt_not_decoupled.user

        self.token_handler = TokenHandler(self.git_provider.pr,
                                          self.vars,
                                          self.pr_code_suggestions_prompt_system,
                                          self.pr_code_suggestions_prompt_user)

        self.progress = f"## Generating PR code suggestions\n\n"
        self.progress += f"""\nWork in progress ...<br>\n<img src="https://codium.ai/images/pr_agent/dual_ball_loading-crop.gif" width=48>"""
        self.progress_response = None

    async def run(self):
        try:
            if not self.git_provider.get_files():
                get_logger().info(f"PR has no files: {self.pr_url}, skipping code suggestions")
                return None

            get_logger().info('Generating code suggestions for PR...')
            relevant_configs = {'pr_code_suggestions': dict(get_settings().pr_code_suggestions),
                                'config': dict(get_settings().config)}
            get_logger().debug("Relevant configs", artifacts=relevant_configs)

            # publish "Preparing suggestions..." comments
            if (get_settings().config.publish_output and get_settings().config.publish_output_progress and
                    not get_settings().config.get('is_auto_command', False)):
                if self.git_provider.is_supported("gfm_markdown"):
                    self.progress_response = self.git_provider.publish_comment(self.progress)
                else:
                    self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)

            # # call the model to get the suggestions, and self-reflect on them
            # if not self.is_extended:
            #     data = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
            # else:
            data = await retry_with_fallback_models(self.prepare_prediction_main, model_type=ModelType.REGULAR)
            if not data:
                data = {"code_suggestions": []}
            self.data = data

            # Handle the case where the PR has no suggestions
            if (data is None or 'code_suggestions' not in data or not data['code_suggestions']):
                await self.publish_no_suggestions()
                return

            # publish the suggestions
            if get_settings().config.publish_output:
                # If a temporary comment was published, remove it
                self.git_provider.remove_initial_comment()

                # Publish table summarized suggestions
                if ((not get_settings().pr_code_suggestions.commitable_code_suggestions) and
                        self.git_provider.is_supported("gfm_markdown")):

                    # generate summarized suggestions
                    pr_body = self.generate_summarized_suggestions(data)
                    get_logger().debug(f"PR output", artifact=pr_body)

                    # require self-review
                    if get_settings().pr_code_suggestions.demand_code_suggestions_self_review:
                        pr_body = await self.add_self_review_text(pr_body)

                    # add usage guide
                    if (get_settings().pr_code_suggestions.enable_chat_text and get_settings().config.is_auto_command
                            and isinstance(self.git_provider, GithubProvider)):
                        pr_body += "\n\n>💡 Need additional feedback ? start a [PR chat](https://chromewebstore.google.com/detail/ephlnjeghhogofkifjloamocljapahnl) \n\n"
                    if get_settings().pr_code_suggestions.enable_help_text:
                        pr_body += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
                        pr_body += HelpMessage.get_improve_usage_guide()
                        pr_body += "\n</details>\n"

                    # Output the relevant configurations if enabled
                    if get_settings().get('config', {}).get('output_relevant_configurations', False):
                        pr_body += show_relevant_configurations(relevant_section='pr_code_suggestions')

                    # publish the PR comment
                    if get_settings().pr_code_suggestions.persistent_comment: # true by default
                        self.publish_persistent_comment_with_history(self.git_provider,
                                                                     pr_body,
                                                                     initial_header="## PR Code Suggestions ✨",
                                                                     update_header=True,
                                                                     name="suggestions",
                                                                     final_update_message=False,
                                                                     max_previous_comments=get_settings().pr_code_suggestions.max_history_len,
                                                                     progress_response=self.progress_response)
                    else:
                        if self.progress_response:
                            self.git_provider.edit_comment(self.progress_response, body=pr_body)
                        else:
                            self.git_provider.publish_comment(pr_body)

                    # dual publishing mode
                    if int(get_settings().pr_code_suggestions.dual_publishing_score_threshold) > 0:
                        await self.dual_publishing(data)
                else:
                    await self.push_inline_code_suggestions(data)
                    if self.progress_response:
                        self.git_provider.remove_comment(self.progress_response)
            else:
                get_logger().info('Code suggestions generated for PR, but not published since publish_output is False.')
                pr_body = self.generate_summarized_suggestions(data)
                get_settings().data = {"artifact": pr_body}
                return
        except Exception as e:
            get_logger().error(f"Failed to generate code suggestions for PR, error: {e}",
                               artifact={"traceback": traceback.format_exc()})
            if get_settings().config.publish_output:
                if self.progress_response:
                    self.git_provider.remove_comment(self.progress_response)
                else:
                    try:
                        self.git_provider.remove_initial_comment()
                        self.git_provider.publish_comment(f"Failed to generate code suggestions for PR")
                    except Exception as e:
                        get_logger().exception(f"Failed to update persistent review, error: {e}")

    async def add_self_review_text(self, pr_body):
        text = get_settings().pr_code_suggestions.code_suggestions_self_review_text
        pr_body += f"\n\n- [ ]  {text}"
        approve_pr_on_self_review = get_settings().pr_code_suggestions.approve_pr_on_self_review
        fold_suggestions_on_self_review = get_settings().pr_code_suggestions.fold_suggestions_on_self_review
        if approve_pr_on_self_review and not fold_suggestions_on_self_review:
            pr_body += ' <!-- approve pr self-review -->'
        elif fold_suggestions_on_self_review and not approve_pr_on_self_review:
            pr_body += ' <!-- fold suggestions self-review -->'
        else:
            pr_body += ' <!-- approve and fold suggestions self-review -->'
        return pr_body

    async def publish_no_suggestions(self):
        pr_body = "## PR Code Suggestions ✨\n\nNo code suggestions found for the PR."
        if (get_settings().config.publish_output and
                get_settings().pr_code_suggestions.get('publish_output_no_suggestions', True)):
            get_logger().warning('No code suggestions found for the PR.')
            get_logger().debug(f"PR output", artifact=pr_body)
            if self.progress_response:
                self.git_provider.edit_comment(self.progress_response, body=pr_body)
            else:
                self.git_provider.publish_comment(pr_body)
        else:
            get_settings().data = {"artifact": ""}

    async def dual_publishing(self, data):
        data_above_threshold = {'code_suggestions': []}
        try:
            for suggestion in data['code_suggestions']:
                if int(suggestion.get('score', 0)) >= int(
                        get_settings().pr_code_suggestions.dual_publishing_score_threshold) \
                        and suggestion.get('improved_code'):
                    data_above_threshold['code_suggestions'].append(suggestion)
                    if not data_above_threshold['code_suggestions'][-1]['existing_code']:
                        get_logger().info(f'Identical existing and improved code for dual publishing found')
                        data_above_threshold['code_suggestions'][-1]['existing_code'] = suggestion[
                            'improved_code']
            if data_above_threshold['code_suggestions']:
                get_logger().info(
                    f"Publishing {len(data_above_threshold['code_suggestions'])} suggestions in dual publishing mode")
                await self.push_inline_code_suggestions(data_above_threshold)
        except Exception as e:
            get_logger().error(f"Failed to publish dual publishing suggestions, error: {e}")

    @staticmethod
    def publish_persistent_comment_with_history(git_provider: GitProvider,
                                                pr_comment: str,
                                                initial_header: str,
                                                update_header: bool = True,
                                                name='review',
                                                final_update_message=True,
                                                max_previous_comments=4,
                                                progress_response=None,
                                                only_fold=False):

        def _extract_link(comment_text: str):
            r = re.compile(r"<!--.*?-->")
            match = r.search(comment_text)

            up_to_commit_txt = ""
            if match:
                up_to_commit_txt = f" up to commit {match.group(0)[4:-3].strip()}"
            return up_to_commit_txt

        history_header = f"#### Previous suggestions\n"
        last_commit_num = git_provider.get_latest_commit_url().split('/')[-1][:7]
        if only_fold: # A user clicked on the 'self-review' checkbox
            text = get_settings().pr_code_suggestions.code_suggestions_self_review_text
            latest_suggestion_header = f"\n\n- [x]  {text}"
        else:
            latest_suggestion_header = f"Latest suggestions up to {last_commit_num}"
        latest_commit_html_comment = f"<!-- {last_commit_num} -->"
        found_comment = None

        if max_previous_comments > 0:
            try:
                prev_comments = list(git_provider.get_issue_comments())
                for comment in prev_comments:
                    if comment.body.startswith(initial_header):
                        prev_suggestions = comment.body
                        found_comment = comment
                        comment_url = git_provider.get_comment_url(comment)

                        if history_header.strip() not in comment.body:
                            # no history section
                            # extract everything between <table> and </table> in comment.body including <table> and </table>
                            table_index = comment.body.find("<table>")
                            if table_index == -1:
                                git_provider.edit_comment(comment, pr_comment)
                                continue
                            # find http link from comment.body[:table_index]
                            up_to_commit_txt = _extract_link(comment.body[:table_index])
                            prev_suggestion_table = comment.body[
                                                    table_index:comment.body.rfind("</table>") + len("</table>")]

                            tick = "✅ " if "✅" in prev_suggestion_table else ""
                            # surround with details tag
                            prev_suggestion_table = f"<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\n<br>{prev_suggestion_table}\n\n</details>"

                            new_suggestion_table = pr_comment.replace(initial_header, "").strip()

                            pr_comment_updated = f"{initial_header}\n{latest_commit_html_comment}\n\n"
                            pr_comment_updated += f"{latest_suggestion_header}\n{new_suggestion_table}\n\n___\n\n"
                            pr_comment_updated += f"{history_header}{prev_suggestion_table}\n"
                        else:
                            # get the text of the previous suggestions until the latest commit
                            sections = prev_suggestions.split(history_header.strip())
                            latest_table = sections[0].strip()
                            prev_suggestion_table = sections[1].replace(history_header, "").strip()

                            # get text after the latest_suggestion_header in comment.body
                            table_ind = latest_table.find("<table>")
                            up_to_commit_txt = _extract_link(latest_table[:table_ind])

                            latest_table = latest_table[table_ind:latest_table.rfind("</table>") + len("</table>")]
                            # enforce max_previous_comments
                            count = prev_suggestions.count(f"\n<details><summary>{name.capitalize()}")
                            count += prev_suggestions.count(f"\n<details><summary>✅ {name.capitalize()}")
                            if count >= max_previous_comments:
                                # remove the oldest suggestion
                                prev_suggestion_table = prev_suggestion_table[:prev_suggestion_table.rfind(
                                    f"<details><summary>{name.capitalize()} up to commit")]

                            tick = "✅ " if "✅" in latest_table else ""
                            # Add to the prev_suggestions section
                            last_prev_table = f"\n<details><summary>{tick}{name.capitalize()}{up_to_commit_txt}</summary>\n<br>{latest_table}\n\n</details>"
                            prev_suggestion_table = last_prev_table + "\n" + prev_suggestion_table

                            new_suggestion_table = pr_comment.replace(initial_header, "").strip()

                            pr_comment_updated = f"{initial_header}\n"
                            pr_comment_updated += f"{latest_commit_html_comment}\n\n"
                            pr_comment_updated += f"{latest_suggestion_header}\n\n{new_suggestion_table}\n\n"
                            pr_comment_updated += "___\n\n"
                            pr_comment_updated += f"{history_header}\n"
                            pr_comment_updated += f"{prev_suggestion_table}\n"

                        get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message")
                        if progress_response:  # publish to 'progress_response' comment, because it refreshes immediately
                            git_provider.edit_comment(progress_response, pr_comment_updated)
                            git_provider.remove_comment(comment)
                            comment = progress_response
                        else:
                            git_provider.edit_comment(comment, pr_comment_updated)
                        return comment
            except Exception as e:
                get_logger().exception(f"Failed to update persistent review, error: {e}")
                pass

        # if we are here, we did not find a previous comment to update
        body = pr_comment.replace(initial_header, "").strip()
        pr_comment = f"{initial_header}\n\n{latest_commit_html_comment}\n\n{body}\n\n"
        if progress_response:
            git_provider.edit_comment(progress_response, pr_comment)
            new_comment = progress_response
        else:
            new_comment = git_provider.publish_comment(pr_comment)
        return new_comment


    def extract_link(self, s):
        r = re.compile(r"<!--.*?-->")
        match = r.search(s)

        up_to_commit_txt = ""
        if match:
            up_to_commit_txt = f" up to commit {match.group(0)[4:-3].strip()}"
        return up_to_commit_txt

    async def _prepare_prediction(self, model: str) -> dict:
        self.patches_diff = get_pr_diff(self.git_provider,
                                        self.token_handler,
                                        model,
                                        add_line_numbers_to_hunks=True,
                                        disable_extra_lines=False)
        self.patches_diff_list = [self.patches_diff]
        self.patches_diff_no_line_number = self.remove_line_numbers([self.patches_diff])[0]

        if self.patches_diff:
            get_logger().debug(f"PR diff", artifact=self.patches_diff)
            self.prediction = await self._get_prediction(model, self.patches_diff, self.patches_diff_no_line_number)
        else:
            get_logger().warning(f"Empty PR diff")
            self.prediction = None

        data = self.prediction
        return data

    async def _get_prediction(self, model: str, patches_diff: str, patches_diff_no_line_number: str) -> dict:
        variables = copy.deepcopy(self.vars)
        variables["diff"] = patches_diff  # update diff
        variables["diff_no_line_numbers"] = patches_diff_no_line_number  # update diff
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(self.pr_code_suggestions_prompt_system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_code_suggestions_prompt.user).render(variables)
        response, finish_reason = await self.ai_handler.chat_completion(
            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
        if not get_settings().config.publish_output:
            get_settings().system_prompt = system_prompt
            get_settings().user_prompt = user_prompt

        # load suggestions from the AI response
        data = self._prepare_pr_code_suggestions(response)

        # self-reflect on suggestions (mandatory, since line numbers are generated now here)
        model_reflect_with_reasoning = get_model('model_reasoning')
        fallbacks = get_settings().config.fallback_models
        if model_reflect_with_reasoning == get_settings().config.model and model != get_settings().config.model and fallbacks and model == \
                fallbacks[0]:
            # we are using a fallback model (should not happen on regular conditions)
            get_logger().warning(f"Using the same model for self-reflection as the one used for suggestions")
            model_reflect_with_reasoning = model
        response_reflect = await self.self_reflect_on_suggestions(data["code_suggestions"],
                                                                  patches_diff, model=model_reflect_with_reasoning)
        if response_reflect:
            await self.analyze_self_reflection_response(data, response_reflect)
        else:
            # get_logger().error(f"Could not self-reflect on suggestions. using default score 7")
            for i, suggestion in enumerate(data["code_suggestions"]):
                suggestion["score"] = 7
                suggestion["score_why"] = ""

        return data

    async def analyze_self_reflection_response(self, data, response_reflect):
        response_reflect_yaml = load_yaml(response_reflect)
        code_suggestions_feedback = response_reflect_yaml.get("code_suggestions", [])
        if code_suggestions_feedback and len(code_suggestions_feedback) == len(data["code_suggestions"]):
            for i, suggestion in enumerate(data["code_suggestions"]):
                try:
                    suggestion["score"] = code_suggestions_feedback[i]["suggestion_score"]
                    suggestion["score_why"] = code_suggestions_feedback[i]["why"]

                    if 'relevant_lines_start' not in suggestion:
                        relevant_lines_start = code_suggestions_feedback[i].get('relevant_lines_start', -1)
                        relevant_lines_end = code_suggestions_feedback[i].get('relevant_lines_end', -1)
                        suggestion['relevant_lines_start'] = relevant_lines_start
                        suggestion['relevant_lines_end'] = relevant_lines_end
                        if relevant_lines_start < 0 or relevant_lines_end < 0:
                            suggestion["score"] = 0

                    try:
                        if get_settings().config.publish_output:
                            if not suggestion["score"]:
                                score = -1
                            else:
                                score = int(suggestion["score"])
                            label = suggestion["label"].lower().strip()
                            label = label.replace('<br>', ' ')
                            suggestion_statistics_dict = {'score': score,
                                                          'label': label}
                            get_logger().info(f"PR-Agent suggestions statistics",
                                              statistics=suggestion_statistics_dict, analytics=True)
                    except Exception as e:
                        get_logger().error(f"Failed to log suggestion statistics, error: {e}")
                        pass

                except Exception as e:  #
                    get_logger().error(f"Error processing suggestion score {i}",
                                       artifact={"suggestion": suggestion,
                                                 "code_suggestions_feedback": code_suggestions_feedback[i]})
                    suggestion["score"] = 7
                    suggestion["score_why"] = ""

                suggestion = self.validate_one_liner_suggestion_not_repeating_code(suggestion)

                # if the before and after code is the same, clear one of them
                try:
                    if suggestion['existing_code'] == suggestion['improved_code']:
                        get_logger().debug(
                            f"edited improved suggestion {i + 1}, because equal to existing code: {suggestion['existing_code']}")
                        if get_settings().pr_code_suggestions.commitable_code_suggestions:
                            suggestion['improved_code'] = ""  # we need 'existing_code' to locate the code in the PR
                        else:
                            suggestion['existing_code'] = ""
                except Exception as e:
                    get_logger().error(f"Error processing suggestion {i + 1}, error: {e}")

    @staticmethod
    def _truncate_if_needed(suggestion):
        max_code_suggestion_length = get_settings().get("PR_CODE_SUGGESTIONS.MAX_CODE_SUGGESTION_LENGTH", 0)
        suggestion_truncation_message = get_settings().get("PR_CODE_SUGGESTIONS.SUGGESTION_TRUNCATION_MESSAGE", "")
        if max_code_suggestion_length > 0:
            if len(suggestion['improved_code']) > max_code_suggestion_length:
                get_logger().info(f"Truncated suggestion from {len(suggestion['improved_code'])} "
                                  f"characters to {max_code_suggestion_length} characters")
                suggestion['improved_code'] = suggestion['improved_code'][:max_code_suggestion_length]
                suggestion['improved_code'] += f"\n{suggestion_truncation_message}"
        return suggestion

    def _prepare_pr_code_suggestions(self, predictions: str) -> Dict:
        data = load_yaml(predictions.strip(),
                         keys_fix_yaml=["relevant_file", "suggestion_content", "existing_code", "improved_code"],
                         first_key="code_suggestions", last_key="label")
        if isinstance(data, list):
            data = {'code_suggestions': data}

        # remove or edit invalid suggestions
        suggestion_list = []
        one_sentence_summary_list = []
        for i, suggestion in enumerate(data['code_suggestions']):
            try:
                needed_keys = ['one_sentence_summary', 'label', 'relevant_file']
                is_valid_keys = True
                for key in needed_keys:
                    if key not in suggestion:
                        is_valid_keys = False
                        get_logger().debug(
                            f"Skipping suggestion {i + 1}, because it does not contain '{key}':\n'{suggestion}")
                        break
                if not is_valid_keys:
                    continue

                if get_settings().get("pr_code_suggestions.focus_only_on_problems", False):
                    CRITICAL_LABEL = 'critical'
                    if CRITICAL_LABEL in suggestion['label'].lower(): # we want the published labels to be less declarative
                        suggestion['label'] = 'possible issue'

                if suggestion['one_sentence_summary'] in one_sentence_summary_list:
                    get_logger().debug(f"Skipping suggestion {i + 1}, because it is a duplicate: {suggestion}")
                    continue

                if 'const' in suggestion['suggestion_content'] and 'instead' in suggestion[
                    'suggestion_content'] and 'let' in suggestion['suggestion_content']:
                    get_logger().debug(
                        f"Skipping suggestion {i + 1}, because it uses 'const instead let': {suggestion}")
                    continue

                if ('existing_code' in suggestion) and ('improved_code' in suggestion):
                    suggestion = self._truncate_if_needed(suggestion)
                    one_sentence_summary_list.append(suggestion['one_sentence_summary'])
                    suggestion_list.append(suggestion)
                else:
                    get_logger().info(
                        f"Skipping suggestion {i + 1}, because it does not contain 'existing_code' or 'improved_code': {suggestion}")
            except Exception as e:
                get_logger().error(f"Error processing suggestion {i + 1}: {suggestion}, error: {e}")
        data['code_suggestions'] = suggestion_list

        return data

    async def push_inline_code_suggestions(self, data):
        code_suggestions = []

        if not data['code_suggestions']:
            get_logger().info('No suggestions found to improve this PR.')
            if self.progress_response:
                return self.git_provider.edit_comment(self.progress_response,
                                                      body='No suggestions found to improve this PR.')
            else:
                return self.git_provider.publish_comment('No suggestions found to improve this PR.')

        for d in data['code_suggestions']:
            try:
                if get_settings().config.verbosity_level >= 2:
                    get_logger().info(f"suggestion: {d}")
                relevant_file = d['relevant_file'].strip()
                relevant_lines_start = int(d['relevant_lines_start'])  # absolute position
                relevant_lines_end = int(d['relevant_lines_end'])
                content = d['suggestion_content'].rstrip()
                new_code_snippet = d['improved_code'].rstrip()
                label = d['label'].strip()

                if new_code_snippet:
                    new_code_snippet = self.dedent_code(relevant_file, relevant_lines_start, new_code_snippet)

                if d.get('score'):
                    body = f"**Suggestion:** {content} [{label}, importance: {d.get('score')}]\n```suggestion\n" + new_code_snippet + "\n```"
                else:
                    body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```"
                code_suggestions.append({'body': body, 'relevant_file': relevant_file,
                                         'relevant_lines_start': relevant_lines_start,
                                         'relevant_lines_end': relevant_lines_end,
                                         'original_suggestion': d})
            except Exception:
                get_logger().info(f"Could not parse suggestion: {d}")

        is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
        if not is_successful:
            get_logger().info("Failed to publish code suggestions, trying to publish each suggestion separately")
            for code_suggestion in code_suggestions:
                self.git_provider.publish_code_suggestions([code_suggestion])

    def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
        try:  # dedent code snippet
            self.diff_files = self.git_provider.diff_files if self.git_provider.diff_files \
                else self.git_provider.get_diff_files()
            original_initial_line = None
            for file in self.diff_files:
                if file.filename.strip() == relevant_file:
                    if file.head_file:
                        file_lines = file.head_file.splitlines()
                        if relevant_lines_start > len(file_lines):
                            get_logger().warning(
                                "Could not dedent code snippet, because relevant_lines_start is out of range",
                                artifact={'filename': file.filename,
                                          'file_content': file.head_file,
                                          'relevant_lines_start': relevant_lines_start,
                                          'new_code_snippet': new_code_snippet})
                            return new_code_snippet
                        else:
                            original_initial_line = file_lines[relevant_lines_start - 1]
                    else:
                        get_logger().warning("Could not dedent code snippet, because head_file is missing",
                                             artifact={'filename': file.filename,
                                                       'relevant_lines_start': relevant_lines_start,
                                                       'new_code_snippet': new_code_snippet})
                        return new_code_snippet
                    break
            if original_initial_line:
                suggested_initial_line = new_code_snippet.splitlines()[0]
                original_initial_spaces = len(original_initial_line) - len(original_initial_line.lstrip()) # lstrip works both for spaces and tabs
                suggested_initial_spaces = len(suggested_initial_line) - len(suggested_initial_line.lstrip())
                delta_spaces = original_initial_spaces - suggested_initial_spaces
                if delta_spaces > 0:
                    # Detect indentation character from original line
                    indent_char = '\t' if original_initial_line.startswith('\t') else ' '
                    new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * indent_char).rstrip('\n')
        except Exception as e:
            get_logger().error(f"Error when dedenting code snippet for file {relevant_file}, error: {e}")

        return new_code_snippet

    def validate_one_liner_suggestion_not_repeating_code(self, suggestion):
        try:
            existing_code = suggestion.get('existing_code', '').strip()
            if '...' in existing_code:
                return suggestion
            new_code = suggestion.get('improved_code', '').strip()

            relevant_file = suggestion.get('relevant_file', '').strip()
            diff_files = self.git_provider.get_diff_files()
            for file in diff_files:
                if file.filename.strip() == relevant_file:
                    # protections
                    if not file.head_file:
                        get_logger().info(f"head_file is empty")
                        return suggestion
                    head_file = file.head_file
                    base_file = file.base_file
                    if existing_code in base_file and existing_code not in head_file and new_code in head_file:
                        suggestion["score"] = 0
                        get_logger().warning(
                            f"existing_code is in the base file but not in the head file, setting score to 0",
                            artifact={"suggestion": suggestion})
        except Exception as e:
            get_logger().exception(f"Error validating one-liner suggestion", artifact={"error": e})

        return suggestion

    def remove_line_numbers(self, patches_diff_list: List[str]) -> List[str]:
        # create a copy of the patches_diff_list, without line numbers for '__new hunk__' sections
        try:
            self.patches_diff_list_no_line_numbers = []
            for patches_diff in self.patches_diff_list:
                patches_diff_lines = patches_diff.splitlines()
                for i, line in enumerate(patches_diff_lines):
                    if line.strip():
                        if line.isnumeric():
                            patches_diff_lines[i] = ''
                        elif line[0].isdigit():
                            # find the first letter in the line that starts with a valid letter
                            for j, char in enumerate(line):
                                if not char.isdigit():
                                    patches_diff_lines[i] = line[j + 1:]
                                    break
                self.patches_diff_list_no_line_numbers.append('\n'.join(patches_diff_lines))
            return self.patches_diff_list_no_line_numbers
        except Exception as e:
            get_logger().error(f"Error removing line numbers from patches_diff_list, error: {e}")
            return patches_diff_list

    async def prepare_prediction_main(self, model: str) -> dict:
        # get PR diff
        if get_settings().pr_code_suggestions.decouple_hunks:
            self.patches_diff_list = get_pr_multi_diffs(self.git_provider,
                                                        self.token_handler,
                                                        model,
                                                        max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
                                                        add_line_numbers=True)  # decouple hunk with line numbers
            self.patches_diff_list_no_line_numbers = self.remove_line_numbers(self.patches_diff_list)  # decouple hunk

        else:
            # non-decoupled hunks
            self.patches_diff_list_no_line_numbers = get_pr_multi_diffs(self.git_provider,
                                                                        self.token_handler,
                                                                        model,
                                                                        max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
                                                                        add_line_numbers=False)
            self.patches_diff_list = await self.convert_to_decoupled_with_line_numbers(
                self.patches_diff_list_no_line_numbers, model)
            if not self.patches_diff_list:
                # fallback to decoupled hunks
                self.patches_diff_list = get_pr_multi_diffs(self.git_provider,
                                                            self.token_handler,
                                                            model,
                                                            max_calls=get_settings().pr_code_suggestions.max_number_of_calls,
                                                            add_line_numbers=True)  # decouple hunk with line numbers

        if self.patches_diff_list:
            get_logger().info(f"Number of PR chunk calls: {len(self.patches_diff_list)}")
            get_logger().debug(f"PR diff:", artifact=self.patches_diff_list)

            # parallelize calls to AI:
            if get_settings().pr_code_suggestions.parallel_calls:
                prediction_list = await asyncio.gather(
                    *[self._get_prediction(model, patches_diff, patches_diff_no_line_numbers) for
                      patches_diff, patches_diff_no_line_numbers in
                      zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers)])
                self.prediction_list = prediction_list
            else:
                prediction_list = []
                for patches_diff, patches_diff_no_line_numbers in zip(self.patches_diff_list, self.patches_diff_list_no_line_numbers):
                    prediction = await self._get_prediction(model, patches_diff, patches_diff_no_line_numbers)
                    prediction_list.append(prediction)

            data = {"code_suggestions": []}
            for j, predictions in enumerate(prediction_list):  # each call adds an element to the list
                if "code_suggestions" in predictions:
                    score_threshold = max(1, int(get_settings().pr_code_suggestions.suggestions_score_threshold))
                    for i, prediction in enumerate(predictions["code_suggestions"]):
                        try:
                            score = int(prediction.get("score", 1))
                            if score >= score_threshold:
                                data["code_suggestions"].append(prediction)
                            else:
                                get_logger().info(
                                    f"Removing suggestions {i} from call {j}, because score is {score}, and score_threshold is {score_threshold}",
                                    artifact=prediction)
                        except Exception as e:
                            get_logger().error(f"Error getting PR diff for suggestion {i} in call {j}, error: {e}",
                                               artifact={"prediction": prediction})
            self.data = data
        else:
            get_logger().warning(f"Empty PR diff list")
            self.data = data = None
        return data

    async def convert_to_decoupled_with_line_numbers(self, patches_diff_list_no_line_numbers, model) -> List[str]:
        with get_logger().contextualize(sub_feature='convert_to_decoupled_with_line_numbers'):
            try:
                patches_diff_list = []
                for patch_prompt in patches_diff_list_no_line_numbers:
                    file_prefix = "## File: "
                    patches = patch_prompt.strip().split(f"\n{file_prefix}")
                    patches_new = copy.deepcopy(patches)
                    for i in range(len(patches_new)):
                        if i == 0:
                            prefix = patches_new[i].split("\n@@")[0].strip()
                        else:
                            prefix = file_prefix + patches_new[i].split("\n@@")[0][1:]
                            prefix = prefix.strip()
                        patches_new[i] = prefix + '\n\n' + decouple_and_convert_to_hunks_with_lines_numbers(patches_new[i],
                                                                                                          file=None).strip()
                        patches_new[i] = patches_new[i].strip()
                    patch_final = "\n\n\n".join(patches_new)
                    if model in MAX_TOKENS:
                        max_tokens_full = MAX_TOKENS[
                            model]  # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
                    else:
                        max_tokens_full = get_max_tokens(model)
                    delta_output = 2000
                    token_count = self.token_handler.count_tokens(patch_final)
                    if token_count > max_tokens_full - delta_output:
                        get_logger().warning(
                            f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. clipping the tokens")
                        patch_final = clip_tokens(patch_final, max_tokens_full - delta_output)
                    patches_diff_list.append(patch_final)
                return patches_diff_list
            except Exception as e:
                get_logger().exception(f"Error converting to decoupled with line numbers",
                                       artifact={'patches_diff_list_no_line_numbers': patches_diff_list_no_line_numbers})
                return []

    def generate_summarized_suggestions(self, data: Dict) -> str:
        try:
            pr_body = "## PR Code Suggestions ✨\n\n"

            if len(data.get('code_suggestions', [])) == 0:
                pr_body += "No suggestions found to improve this PR."
                return pr_body

            if get_settings().config.is_auto_command:
                pr_body += "Explore these optional code suggestions:\n\n"

            language_extension_map_org = get_settings().language_extension_map_org
            extension_to_language = {}
            for language, extensions in language_extension_map_org.items():
                for ext in extensions:
                    extension_to_language[ext] = language

            pr_body += "<table>"
            header = f"Suggestion"
            delta = 66
            header += "&nbsp; " * delta
            pr_body += f"""<thead><tr><td><strong>Category</strong></td><td align=left><strong>{header}</strong></td><td align=center><strong>Impact</strong></td></tr>"""
            pr_body += """<tbody>"""
            suggestions_labels = dict()
            # add all suggestions related to each label
            for suggestion in data['code_suggestions']:
                label = suggestion['label'].strip().strip("'").strip('"')
                if label not in suggestions_labels:
                    suggestions_labels[label] = []
                suggestions_labels[label].append(suggestion)

            # sort suggestions_labels by the suggestion with the highest score
            suggestions_labels = dict(
                sorted(suggestions_labels.items(), key=lambda x: max([s['score'] for s in x[1]]), reverse=True))
            # sort the suggestions inside each label group by score
            for label, suggestions in suggestions_labels.items():
                suggestions_labels[label] = sorted(suggestions, key=lambda x: x['score'], reverse=True)

            counter_suggestions = 0
            for label, suggestions in suggestions_labels.items():
                num_suggestions = len(suggestions)
                pr_body += f"""<tr><td rowspan={num_suggestions}>{label.capitalize()}</td>\n"""
                for i, suggestion in enumerate(suggestions):

                    relevant_file = suggestion['relevant_file'].strip()
                    relevant_lines_start = int(suggestion['relevant_lines_start'])
                    relevant_lines_end = int(suggestion['relevant_lines_end'])
                    range_str = ""
                    if relevant_lines_start == relevant_lines_end:
                        range_str = f"[{relevant_lines_start}]"
                    else:
                        range_str = f"[{relevant_lines_start}-{relevant_lines_end}]"

                    try:
                        code_snippet_link = self.git_provider.get_line_link(relevant_file, relevant_lines_start,
                                                                            relevant_lines_end)
                    except:
                        code_snippet_link = ""
                    # add html table for each suggestion

                    suggestion_content = suggestion['suggestion_content'].rstrip()
                    CHAR_LIMIT_PER_LINE = 84
                    suggestion_content = insert_br_after_x_chars(suggestion_content, CHAR_LIMIT_PER_LINE)
                    # pr_body += f"<tr><td><details><summary>{suggestion_content}</summary>"
                    existing_code = suggestion['existing_code'].rstrip() + "\n"
                    improved_code = suggestion['improved_code'].rstrip() + "\n"

                    diff = difflib.unified_diff(existing_code.split('\n'),
                                                improved_code.split('\n'), n=999)
                    patch_orig = "\n".join(diff)
                    patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n')

                    example_code = ""
                    example_code += f"```diff\n{patch.rstrip()}\n```\n"
                    if i == 0:
                        pr_body += f"""<td>\n\n"""
                    else:
                        pr_body += f"""<tr><td>\n\n"""
                    suggestion_summary = suggestion['one_sentence_summary'].strip().rstrip('.')
                    if "'<" in suggestion_summary and ">'" in suggestion_summary:
                        # escape the '<' and '>' characters, otherwise they are interpreted as html tags
                        get_logger().info(f"Escaped suggestion summary: {suggestion_summary}")
                        suggestion_summary = suggestion_summary.replace("'<", "`<")
                        suggestion_summary = suggestion_summary.replace(">'", ">`")
                    if '`' in suggestion_summary:
                        suggestion_summary = replace_code_tags(suggestion_summary)

                    pr_body += f"""\n\n<details><summary>{suggestion_summary}</summary>\n\n___\n\n"""
                    pr_body += f"""
**{suggestion_content}**

[{relevant_file} {range_str}]({code_snippet_link})

{example_code.rstrip()}
"""
                    if suggestion.get('score_why'):
                        pr_body += f"<details><summary>Suggestion importance[1-10]: {suggestion['score']}</summary>\n\n"
                        pr_body += f"__\n\nWhy: {suggestion['score_why']}\n\n"
                        pr_body += f"</details>"

                    pr_body += f"</details>"

                    # # add another column for 'score'
                    score_int = int(suggestion.get('score', 0))
                    score_str = f"{score_int}"
                    if get_settings().pr_code_suggestions.new_score_mechanism:
                        score_str = self.get_score_str(score_int)
                    pr_body += f"</td><td align=center>{score_str}\n\n"

                    pr_body += f"</td></tr>"
                    counter_suggestions += 1

                # pr_body += "</details>"
                # pr_body += """</td></tr>"""
            pr_body += """</tr></tbody></table>"""
            return pr_body
        except Exception as e:
            get_logger().info(f"Failed to publish summarized code suggestions, error: {e}")
            return ""

    def get_score_str(self, score: int) -> str:
        th_high = get_settings().pr_code_suggestions.get('new_score_mechanism_th_high', 9)
        th_medium = get_settings().pr_code_suggestions.get('new_score_mechanism_th_medium', 7)
        if score >= th_high:
            return "High"
        elif score >= th_medium:
            return "Medium"
        else:  # score < 7
            return "Low"

    async def self_reflect_on_suggestions(self,
                                          suggestion_list: List,
                                          patches_diff: str,
                                          model: str,
                                          prev_suggestions_str: str = "",
                                          dedicated_prompt: str = "") -> str:
        if not suggestion_list:
            return ""

        try:
            suggestion_str = ""
            for i, suggestion in enumerate(suggestion_list):
                suggestion_str += f"suggestion {i + 1}: " + str(suggestion) + '\n\n'

            variables = {'suggestion_list': suggestion_list,
                         'suggestion_str': suggestion_str,
                         "diff": patches_diff,
                         'num_code_suggestions': len(suggestion_list),
                         'prev_suggestions_str': prev_suggestions_str,
                         "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False),
                         'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False)}
            environment = Environment(undefined=StrictUndefined)

            if dedicated_prompt:
                system_prompt_reflect = environment.from_string(
                    get_settings().get(dedicated_prompt).system).render(variables)
                user_prompt_reflect = environment.from_string(
                    get_settings().get(dedicated_prompt).user).render(variables)
            else:
                system_prompt_reflect = environment.from_string(
                    get_settings().pr_code_suggestions_reflect_prompt.system).render(variables)
                user_prompt_reflect = environment.from_string(
                    get_settings().pr_code_suggestions_reflect_prompt.user).render(variables)

            with get_logger().contextualize(command="self_reflect_on_suggestions"):
                response_reflect, finish_reason_reflect = await self.ai_handler.chat_completion(model=model,
                                                                                                system=system_prompt_reflect,
                                                                                                temperature=get_settings().config.temperature,
                                                                                                user=user_prompt_reflect)
        except Exception as e:
            get_logger().info(f"Could not reflect on suggestions, error: {e}")
            return ""
        return response_reflect

================================================
FILE: pr_agent/tools/pr_config.py
================================================
from dynaconf import Dynaconf

from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.log import get_logger


class PRConfig:
    """
    The PRConfig class is responsible for listing all configuration options available for the user.
    """
    def __init__(self, pr_url: str, args=None, ai_handler=None):
        """
        Initialize the PRConfig object with the necessary attributes and objects to comment on a pull request.

        Args:
            pr_url (str): The URL of the pull request to be reviewed.
            args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.
        """
        self.git_provider = get_git_provider()(pr_url)

    async def run(self):
        get_logger().info('Getting configuration settings...')
        get_logger().info('Preparing configs...')
        pr_comment = self._prepare_pr_configs()
        if get_settings().config.publish_output:
            get_logger().info('Pushing configs...')
            self.git_provider.publish_comment(pr_comment)
            self.git_provider.remove_initial_comment()
        return ""

    def _prepare_pr_configs(self) -> str:
        try:
            conf_file = get_settings().find_file("configuration.toml")
            dynconf_kwargs = {'core_loaders': [],  # DISABLE default loaders, otherwise will load toml files more than once.
                 'loaders': ['pr_agent.custom_merge_loader'],
                 # Use a custom loader to merge sections, but overwrite their overlapping values. Do not use ENV variables.
                 'merge_enabled': True
                 # Merge multiple TOML files; prevent full section overwrite—only overlapping keys in sections overwrite prior ones.
             }
            conf_settings = Dynaconf(settings_files=[conf_file],
                                     # Security: Disable all dynamic loading features
                                     load_dotenv=False,  # Don't load .env files
                                     envvar_prefix=False,
                                     **dynconf_kwargs
                                     )
        except Exception as e:
            get_logger().error("Caught exception during Dynaconf loading. Returning empty dict",
                               artifact={"exception": e})
            conf_settings = {}
        configuration_headers = [header.lower() for header in conf_settings.keys()]
        relevant_configs = {
            header: configs for header, configs in get_settings().to_dict().items()
            if (header.lower().startswith("pr_") or header.lower().startswith("config")) and header.lower() in configuration_headers
        }

        skip_keys = ['ai_disclaimer', 'ai_disclaimer_title', 'ANALYTICS_FOLDER', 'secret_provider', "skip_keys", "app_id", "redirect",
                     'trial_prefix_message', 'no_eligible_message', 'identity_provider', 'ALLOWED_REPOS',
                     'APP_NAME', 'PERSONAL_ACCESS_TOKEN', 'shared_secret', 'key', 'AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'user_token',
                     'private_key', 'private_key_id', 'client_id', 'client_secret', 'token', 'bearer_token', 'jira_api_token','webhook_secret']
        partial_skip_keys = ['key', 'secret', 'token', 'private']
        extra_skip_keys = get_settings().config.get('config.skip_keys', [])
        if extra_skip_keys:
            skip_keys.extend(extra_skip_keys)
        skip_keys_lower = [key.lower() for key in skip_keys]


        markdown_text = "<details> <summary><strong>🛠️ PR-Agent Configurations:</strong></summary> \n\n"
        markdown_text += f"\n\n```yaml\n\n"
        for header, configs in relevant_configs.items():
            if configs:
                markdown_text += "\n\n"
                markdown_text += f"==================== {header} ===================="
            for key, value in configs.items():
                if key.lower() in skip_keys_lower:
                    continue
                if any(skip_key in key.lower() for skip_key in partial_skip_keys):
                    continue
                markdown_text += f"\n{header.lower()}.{key.lower()} = {repr(value) if isinstance(value, str) else value}"
                markdown_text += "  "
        markdown_text += "\n```"
        markdown_text += "\n</details>\n"
        get_logger().info(f"Possible Configurations outputted to PR comment", artifact=markdown_text)
        return markdown_text


================================================
FILE: pr_agent/tools/pr_description.py
================================================
import asyncio
import copy
import re
import traceback
from functools import partial
from typing import List, Tuple

import yaml
from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import (OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD,
                                         get_pr_diff,
                                         get_pr_diff_multiple_patchs,
                                         retry_with_fallback_models)
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import (ModelType, PRDescriptionHeader, clip_tokens,
                                 get_max_tokens, get_user_labels, load_yaml,
                                 set_custom_labels,
                                 show_relevant_configurations)
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import (GithubProvider, get_git_provider,
                                    get_git_provider_with_context)
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage
from pr_agent.tools.ticket_pr_compliance_check import (
    extract_and_cache_pr_tickets, extract_ticket_links_from_pr_description,
    extract_tickets)


class PRDescription:
    def __init__(self, pr_url: str, args: list = None,
                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        """
        Initialize the PRDescription object with the necessary attributes and objects for generating a PR description
        using an AI model.
        Args:
            pr_url (str): The URL of the pull request.
            args (list, optional): List of arguments passed to the PRDescription class. Defaults to None.
        """
        # Initialize the git provider and main PR language
        self.git_provider = get_git_provider_with_context(pr_url)
        self.main_pr_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )
        self.pr_id = self.git_provider.get_pr_id()
        self.keys_fix = ["filename:", "language:", "changes_summary:", "changes_title:", "description:", "title:"]

        if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported(
                "gfm_markdown"):
            get_logger().debug(f"Disabling semantic files types for {self.pr_id}, gfm_markdown not supported.")
            get_settings().pr_description.enable_semantic_files_types = False

        # Initialize the AI handler
        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_pr_language

        # Initialize the variables dictionary
        self.COLLAPSIBLE_FILE_LIST_THRESHOLD = get_settings().pr_description.get("collapsible_file_list_threshold", 8)
        enable_pr_diagram = get_settings().pr_description.get("enable_pr_diagram", False) and self.git_provider.is_supported("gfm_markdown") # github and gitlab support gfm_markdown
        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.git_provider.get_pr_description(full=False),
            "language": self.main_pr_language,
            "diff": "",  # empty diff for initial calculation
            "extra_instructions": get_settings().pr_description.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
            "enable_custom_labels": get_settings().config.enable_custom_labels,
            "custom_labels_class": "",  # will be filled if necessary in 'set_custom_labels' function
            "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
            "related_tickets": "",
            "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD,
            "duplicate_prompt_examples": get_settings().config.get("duplicate_prompt_examples", False),
            "enable_pr_diagram": enable_pr_diagram,
        }

        self.user_description = self.git_provider.get_user_description()

        # Initialize the token handler
        self.token_handler = TokenHandler(
            self.git_provider.pr,
            self.vars,
            get_settings().pr_description_prompt.system,
            get_settings().pr_description_prompt.user,
        )

        # Initialize patches_diff and prediction attributes
        self.patches_diff = None
        self.prediction = None
        self.file_label_dict = None

    async def run(self):
        try:
            get_logger().info(f"Generating a PR description for pr_id: {self.pr_id}")
            relevant_configs = {'pr_description': dict(get_settings().pr_description),
                                'config': dict(get_settings().config)}
            get_logger().debug("Relevant configs", artifact=relevant_configs)
            if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):
                self.git_provider.publish_comment("Preparing PR description...", is_temporary=True)

            # ticket extraction if exists
            await extract_and_cache_pr_tickets(self.git_provider, self.vars)

            await retry_with_fallback_models(self._prepare_prediction, ModelType.WEAK)

            if self.prediction:
                self._prepare_data()
            else:
                get_logger().warning(f"Empty prediction, PR: {self.pr_id}")
                self.git_provider.remove_initial_comment()
                return None

            if get_settings().pr_description.enable_semantic_files_types:
                self.file_label_dict = self._prepare_file_labels()

            pr_labels, pr_file_changes = [], []
            if get_settings().pr_description.publish_labels:
                pr_labels = self._prepare_labels()
            else:
                get_logger().debug(f"Publishing labels disabled")

            if get_settings().pr_description.use_description_markers:
                pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer_with_markers()
            else:
                pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer()
                if not self.git_provider.is_supported(
                        "publish_file_comments") or not get_settings().pr_description.inline_file_summary:
                    pr_body += "\n\n" + changes_walkthrough + "___\n\n"
            get_logger().debug("PR output", artifact={"title": pr_title, "body": pr_body})

            # Add help text if gfm_markdown is supported
            if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_description.enable_help_text:
                pr_body += "<hr>\n\n<details> <summary><strong>✨ Describe tool usage guide:</strong></summary><hr> \n\n"
                pr_body += HelpMessage.get_describe_usage_guide()
                pr_body += "\n</details>\n"
            elif get_settings().pr_description.enable_help_comment and self.git_provider.is_supported("gfm_markdown"):
                if isinstance(self.git_provider, GithubProvider):
                    pr_body += ('\n\n___\n\n> <details> <summary>  Need help?</summary><li>Type <code>/help how to ...</code> '
                                'in the comments thread for any questions about PR-Agent usage.</li><li>Check out the '
                                '<a href="https://qodo-merge-docs.qodo.ai/usage-guide/">documentation</a> '
                                'for more information.</li></details>')
                else: # gitlab
                    pr_body += ("\n\n___\n\n<details><summary>Need help?</summary>- Type <code>/help how to ...</code> in the comments "
                                "thread for any questions about PR-Agent usage.<br>- Check out the "
                                "<a href='https://qodo-merge-docs.qodo.ai/usage-guide/'>documentation</a> for more information.</details>")
            # elif get_settings().pr_description.enable_help_comment:
            #     pr_body += '\n\n___\n\n> 💡 **PR-Agent usage**: Comment `/help "your question"` on any pull request to receive relevant information'

            # Output the relevant configurations if enabled
            if get_settings().get('config', {}).get('output_relevant_configurations', False):
                pr_body += show_relevant_configurations(relevant_section='pr_description')

            if get_settings().config.publish_output:

                # publish labels
                if get_settings().pr_description.publish_labels and pr_labels and self.git_provider.is_supported("get_labels"):
                    original_labels = self.git_provider.get_pr_labels(update=True)
                    get_logger().debug(f"original labels", artifact=original_labels)
                    user_labels = get_user_labels(original_labels)
                    new_labels = pr_labels + user_labels
                    get_logger().debug(f"published labels", artifact=new_labels)
                    if set(new_labels) != set(original_labels):
                        get_logger().info(f"Setting describe labels:\n{new_labels}")
                        self.git_provider.publish_labels(new_labels)
                    else:
                        get_logger().debug(f"Labels are the same, not updating")

                # publish description
                if get_settings().pr_description.publish_description_as_comment:
                    full_markdown_description = f"## Title\n\n{pr_title.strip()}\n\n___\n{pr_body}"
                    if get_settings().pr_description.publish_description_as_comment_persistent:
                        self.git_provider.publish_persistent_comment(full_markdown_description,
                                                                     initial_header="## Title",
                                                                     update_header=True,
                                                                     name="describe",
                                                                     final_update_message=False, )
                    else:
                        self.git_provider.publish_comment(full_markdown_description)
                else:
                    self.git_provider.publish_description(pr_title.strip(), pr_body)

                    # publish final update message
                    if (get_settings().pr_description.final_update_message and not get_settings().config.get('is_auto_command', False)):
                        latest_commit_url = self.git_provider.get_latest_commit_url()
                        if latest_commit_url:
                            pr_url = self.git_provider.get_pr_url()
                            update_comment = f"**[PR Description]({pr_url})** updated to latest commit ({latest_commit_url})"
                            self.git_provider.publish_comment(update_comment)
                self.git_provider.remove_initial_comment()
            else:
                get_logger().info('PR description, but not published since publish_output is False.')
                get_settings().data = {"artifact": pr_body}
                return
        except Exception as e:
            get_logger().error(f"Error generating PR description {self.pr_id}: {e}",
                               artifact={"traceback": traceback.format_exc()})

        return ""

    async def _prepare_prediction(self, model: str) -> None:
        if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
            get_logger().info("Markers were enabled, but user description does not contain markers. Skipping AI prediction")
            return None

        large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings()
        output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, return_remaining_files=True)
        if isinstance(output, tuple):
            patches_diff, remaining_files_list = output
        else:
            patches_diff = output
            remaining_files_list = []

        if not large_pr_handling or patches_diff:
            self.patches_diff = patches_diff
            if patches_diff:
                # generate the prediction
                get_logger().debug(f"PR diff", artifact=self.patches_diff)
                self.prediction = await self._get_prediction(model, patches_diff, prompt="pr_description_prompt")

                # extend the prediction with additional files not shown
                if get_settings().pr_description.enable_semantic_files_types:
                    self.prediction = await self.extend_uncovered_files(self.prediction)
            else:
                get_logger().error(f"Error getting PR diff {self.pr_id}",
                                   artifact={"traceback": traceback.format_exc()})
                self.prediction = None
        else:
            # get the diff in multiple patches, with the token handler only for the files prompt
            get_logger().debug('large_pr_handling for describe')
            token_handler_only_files_prompt = TokenHandler(
                self.git_provider.pr,
                self.vars,
                get_settings().pr_description_only_files_prompts.system,
                get_settings().pr_description_only_files_prompts.user,
            )
            (patches_compressed_list, total_tokens_list, deleted_files_list, remaining_files_list, file_dict,
             files_in_patches_list) = get_pr_diff_multiple_patchs(
                self.git_provider, token_handler_only_files_prompt, model)

            # get the files prediction for each patch
            if not get_settings().pr_description.async_ai_calls:
                results = []
                for i, patches in enumerate(patches_compressed_list):  # sync calls
                    patches_diff = "\n".join(patches)
                    get_logger().debug(f"PR diff number {i + 1} for describe files")
                    prediction_files = await self._get_prediction(model, patches_diff,
                                                                  prompt="pr_description_only_files_prompts")
                    results.append(prediction_files)
            else:  # async calls
                tasks = []
                for i, patches in enumerate(patches_compressed_list):
                    if patches:
                        patches_diff = "\n".join(patches)
                        get_logger().debug(f"PR diff number {i + 1} for describe files")
                        task = asyncio.create_task(
                            self._get_prediction(model, patches_diff, prompt="pr_description_only_files_prompts"))
                        tasks.append(task)
                # Wait for all tasks to complete
                results = await asyncio.gather(*tasks)
            file_description_str_list = []
            for i, result in enumerate(results):
                prediction_files = result.strip().removeprefix('```yaml').strip('`').strip()
                if load_yaml(prediction_files, keys_fix_yaml=self.keys_fix) and prediction_files.startswith('pr_files'):
                    prediction_files = prediction_files.removeprefix('pr_files:').strip()
                    file_description_str_list.append(prediction_files)
                else:
                    get_logger().debug(f"failed to generate predictions in iteration {i + 1} for describe files")

            # generate files_walkthrough string, with proper token handling
            token_handler_only_description_prompt = TokenHandler(
                self.git_provider.pr,
                self.vars,
                get_settings().pr_description_only_description_prompts.system,
                get_settings().pr_description_only_description_prompts.user)
            files_walkthrough = "\n".join(file_description_str_list)
            files_walkthrough_prompt = copy.deepcopy(files_walkthrough)
            MAX_EXTRA_FILES_TO_PROMPT = 50
            if remaining_files_list:
                files_walkthrough_prompt += "\n\nNo more token budget. Additional unprocessed files:"
                for i, file in enumerate(remaining_files_list):
                    files_walkthrough_prompt += f"\n- {file}"
                    if i >= MAX_EXTRA_FILES_TO_PROMPT:
                        get_logger().debug(f"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_PROMPT}")
                        files_walkthrough_prompt += f"\n... and {len(remaining_files_list) - MAX_EXTRA_FILES_TO_PROMPT} more"
                        break
            if deleted_files_list:
                files_walkthrough_prompt += "\n\nAdditional deleted files:"
                for i, file in enumerate(deleted_files_list):
                    files_walkthrough_prompt += f"\n- {file}"
                    if i >= MAX_EXTRA_FILES_TO_PROMPT:
                        get_logger().debug(f"Too many deleted files, clipping to {MAX_EXTRA_FILES_TO_PROMPT}")
                        files_walkthrough_prompt += f"\n... and {len(deleted_files_list) - MAX_EXTRA_FILES_TO_PROMPT} more"
                        break
            tokens_files_walkthrough = len(
                token_handler_only_description_prompt.encoder.encode(files_walkthrough_prompt))
            total_tokens = token_handler_only_description_prompt.prompt_tokens + tokens_files_walkthrough
            max_tokens_model = get_max_tokens(model)
            if total_tokens > max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD:
                # clip files_walkthrough to git the tokens within the limit
                files_walkthrough_prompt = clip_tokens(files_walkthrough_prompt,
                                                       max_tokens_model - OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD - token_handler_only_description_prompt.prompt_tokens,
                                                       num_input_tokens=tokens_files_walkthrough)

            # PR header inference
            get_logger().debug(f"PR diff only description", artifact=files_walkthrough_prompt)
            prediction_headers = await self._get_prediction(model, patches_diff=files_walkthrough_prompt,
                                                            prompt="pr_description_only_description_prompts")
            prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip()

            # extend the tables with the files not shown
            files_walkthrough_extended = await self.extend_uncovered_files(files_walkthrough)

            # final processing
            self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough_extended
            if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix):
                get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}")
                if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix):
                    get_logger().debug(f"Using only headers for describe {self.pr_id}")
                    self.prediction = prediction_headers

    async def extend_uncovered_files(self, original_prediction: str) -> str:
        try:
            prediction = original_prediction

            # get the original prediction filenames
            original_prediction_loaded = load_yaml(original_prediction, keys_fix_yaml=self.keys_fix)
            if isinstance(original_prediction_loaded, list):
                original_prediction_dict = {"pr_files": original_prediction_loaded}
            else:
                original_prediction_dict = original_prediction_loaded
            if original_prediction_dict:
                files = original_prediction_dict.get('pr_files', [])
                filenames_predicted = [file.get('filename', '').strip() for file in files if isinstance(file, dict)]
            else:
                filenames_predicted = []

            # extend the prediction with additional files not included in the original prediction
            pr_files = self.git_provider.get_diff_files()
            prediction_extra = "pr_files:"
            MAX_EXTRA_FILES_TO_OUTPUT = 100
            counter_extra_files = 0
            for file in pr_files:
                if file.filename in filenames_predicted:
                    continue

                # add up to MAX_EXTRA_FILES_TO_OUTPUT files
                counter_extra_files += 1
                if counter_extra_files > MAX_EXTRA_FILES_TO_OUTPUT:
                    extra_file_yaml = f"""\
- filename: |
    Additional files not shown
  changes_title: |
    ...
  label: |
    additional files
"""
                    prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
                    get_logger().debug(f"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_OUTPUT}")
                    break

                extra_file_yaml = f"""\
- filename: |
    {file.filename}
  changes_title: |
    ...
  label: |
    additional files
"""
                prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()

            # merge the two dictionaries
            if counter_extra_files > 0:
                get_logger().info(f"Adding {counter_extra_files} unprocessed extra files to table prediction")
                prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)
                if original_prediction_dict and isinstance(original_prediction_dict, dict) and \
                        isinstance(prediction_extra_dict, dict) and "pr_files" in prediction_extra_dict:
                    if "pr_files" in original_prediction_dict:
                        original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"])
                    else:
                        original_prediction_dict["pr_files"] = prediction_extra_dict["pr_files"]
                    new_yaml = yaml.dump(original_prediction_dict)
                    if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):
                        prediction = new_yaml
                if isinstance(original_prediction, list):
                    prediction = yaml.dump(original_prediction_dict["pr_files"])

            return prediction
        except Exception as e:
            get_logger().exception(f"Error extending uncovered files {self.pr_id}", artifact={"error": e})
            return original_prediction


    async def extend_additional_files(self, remaining_files_list) -> str:
        prediction = self.prediction
        try:
            original_prediction_dict = load_yaml(self.prediction, keys_fix_yaml=self.keys_fix)
            prediction_extra = "pr_files:"
            for file in remaining_files_list:
                extra_file_yaml = f"""\
- filename: |
    {file}
  changes_summary: |
    ...
  changes_title: |
    ...
  label: |
    additional files (token-limit)
"""
                prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
            prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)
            # merge the two dictionaries
            if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict):
                original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"])
                new_yaml = yaml.dump(original_prediction_dict)
                if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):
                    prediction = new_yaml
            return prediction
        except Exception as e:
            get_logger().error(f"Error extending additional files {self.pr_id}: {e}")
            return self.prediction

    async def _get_prediction(self, model: str, patches_diff: str, prompt="pr_description_prompt") -> str:
        variables = copy.deepcopy(self.vars)
        variables["diff"] = patches_diff  # update diff

        environment = Environment(undefined=StrictUndefined)
        set_custom_labels(variables, self.git_provider)
        self.variables = variables

        system_prompt = environment.from_string(get_settings().get(prompt, {}).get("system", "")).render(self.variables)
        user_prompt = environment.from_string(get_settings().get(prompt, {}).get("user", "")).render(self.variables)

        response, finish_reason = await self.ai_handler.chat_completion(
            model=model,
            temperature=get_settings().config.temperature,
            system=system_prompt,
            user=user_prompt
        )

        return response

    def _prepare_data(self):
        # Load the AI prediction data into a dictionary
        self.data = load_yaml(self.prediction.strip(), keys_fix_yaml=self.keys_fix)

        if get_settings().pr_description.add_original_user_description and self.user_description:
            self.data["User Description"] = self.user_description

        # re-order keys
        if 'User Description' in self.data:
            self.data['User Description'] = self.data.pop('User Description')
        if 'title' in self.data:
            self.data['title'] = self.data.pop('title')
        if 'type' in self.data:
            self.data['type'] = self.data.pop('type')
        if 'labels' in self.data:
            self.data['labels'] = self.data.pop('labels')
        if 'description' in self.data:
            self.data['description'] = self.data.pop('description')
        if 'changes_diagram' in self.data:
            changes_diagram = self.data.pop('changes_diagram').strip()
            if changes_diagram.startswith('```'):
                if not changes_diagram.endswith('```'):  # fallback for missing closing
                    changes_diagram += '\n```'
                self.data['changes_diagram'] = '\n'+ changes_diagram
        if 'pr_files' in self.data:
            self.data['pr_files'] = self.data.pop('pr_files')

    def _prepare_labels(self) -> List[str]:
        pr_labels = []

        # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
        if 'labels' in self.data and self.data['labels']:
            if type(self.data['labels']) == list:
                pr_labels = self.data['labels']
            elif type(self.data['labels']) == str:
                pr_labels = self.data['labels'].split(',')
        elif 'type' in self.data and self.data['type'] and get_settings().pr_description.publish_labels:
            if type(self.data['type']) == list:
                pr_labels = self.data['type']
            elif type(self.data['type']) == str:
                pr_labels = self.data['type'].split(',')
        pr_labels = [label.strip() for label in pr_labels]

        # convert lowercase labels to original case
        try:
            if "labels_minimal_to_labels_dict" in self.variables:
                d: dict = self.variables["labels_minimal_to_labels_dict"]
                for i, label_i in enumerate(pr_labels):
                    if label_i in d:
                        pr_labels[i] = d[label_i]
        except Exception as e:
            get_logger().error(f"Error converting labels to original case {self.pr_id}: {e}")
        return pr_labels

    def _prepare_pr_answer_with_markers(self) -> Tuple[str, str, str, List[dict]]:
        get_logger().info(f"Using description marker replacements {self.pr_id}")

        # Remove the 'PR Title' key from the dictionary
        ai_title = self.data.pop('title', self.vars["title"])
        if (not get_settings().pr_description.generate_ai_title):
            # Assign the original PR title to the 'title' variable
            title = self.vars["title"]
        else:
            # Assign the value of the 'PR Title' key to 'title' variable
            title = ai_title

        body = self.user_description
        if get_settings().pr_description.include_generated_by_header:
            ai_header = f"### 🤖 Generated by PR Agent at {self.git_provider.last_commit_id.sha}\n\n"
        else:
            ai_header = ""

        ai_type = self.data.get('type')
        if ai_type and not re.search(r'<!--\s*pr_agent:type\s*-->', body):
            if isinstance(ai_type, list):
                pr_type = ', '.join(str(t) for t in ai_type)
            else:
                pr_type = ai_type
            pr_type = f"{ai_header}{pr_type}"
            body = body.replace('pr_agent:type', pr_type)

        ai_summary = self.data.get('description')
        if ai_summary and not re.search(r'<!--\s*pr_agent:summary\s*-->', body):
            summary = f"{ai_header}{ai_summary}"
            body = body.replace('pr_agent:summary', summary)

        ai_walkthrough = self.data.get('pr_files')
        walkthrough_gfm = ""
        pr_file_changes = []
        if ai_walkthrough and not re.search(r'<!--\s*pr_agent:walkthrough\s*-->', body):
            try:
                walkthrough_gfm, pr_file_changes = self.process_pr_files_prediction(walkthrough_gfm,
                                                                                    self.file_label_dict)
                body = body.replace('pr_agent:walkthrough', walkthrough_gfm)
            except Exception as e:
                get_logger().error(f"Failing to process walkthrough {self.pr_id}: {e}")
                body = body.replace('pr_agent:walkthrough', "")

        # Add support for pr_agent:diagram marker (plain and HTML comment formats)
        ai_diagram = self.data.get('changes_diagram')
        if ai_diagram:
            body = re.sub(r'<!--\s*pr_agent:diagram\s*-->|pr_agent:diagram', ai_diagram, body)

        return title, body, walkthrough_gfm, pr_file_changes

    def _prepare_pr_answer(self) -> Tuple[str, str, str, List[dict]]:
        """
        Prepare the PR description based on the AI prediction data.

        Returns:
        - title: a string containing the PR title.
        - pr_body: a string containing the PR description body in a markdown format.
        """

        # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
        # Don't display 'PR Labels'
        if 'labels' in self.data and self.git_provider.is_supported("get_labels"):
            self.data.pop('labels')
        if not get_settings().pr_description.enable_pr_type:
            self.data.pop('type')

        # Remove the 'PR Title' key from the dictionary
        ai_title = self.data.pop('title', self.vars["title"])
        if (not get_settings().pr_description.generate_ai_title):
            # Assign the original PR title to the 'title' variable
            title = self.vars["title"]
        else:
            # Assign the value of the 'PR Title' key to 'title' variable
            title = ai_title

        # Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,
        # except for the items containing the word 'walkthrough'
        pr_body, changes_walkthrough = "", ""
        pr_file_changes = []
        for idx, (key, value) in enumerate(self.data.items()):
            if key == 'changes_diagram':
                pr_body += f"### {PRDescriptionHeader.DIAGRAM_WALKTHROUGH.value}\n\n"
                pr_body += f"{value}\n\n"
                continue
            if key == 'pr_files':
                value = self.file_label_dict
            else:
                key_publish = key.rstrip(':').replace("_", " ").capitalize()
                if key_publish == "Type":
                    key_publish = "PR Type"
                # elif key_publish == "Description":
                #     key_publish = "PR Description"
                pr_body += f"### **{key_publish}**\n"
            if 'walkthrough' in key.lower():
                if self.git_provider.is_supported("gfm_markdown"):
                    pr_body += "<details> <summary>files:</summary>\n\n"
                for file in value:
                    filename = file['filename'].replace("'", "`")
                    description = file['changes_in_file']
                    pr_body += f'- `{filename}`: {description}\n'
                if self.git_provider.is_supported("gfm_markdown"):
                    pr_body += "</details>\n"
            elif 'pr_files' in key.lower() and get_settings().pr_description.enable_semantic_files_types: # 'File Walkthrough' section
                changes_walkthrough_table, pr_file_changes = self.process_pr_files_prediction(changes_walkthrough, value)
                if get_settings().pr_description.get('file_table_collapsible_open_by_default', False):
                    initial_status = " open"
                else:
                    initial_status = ""
                changes_walkthrough = f"<details{initial_status}> <summary><h3> {PRDescriptionHeader.FILE_WALKTHROUGH.value}</h3></summary>\n\n"
                changes_walkthrough += f"{changes_walkthrough_table}\n\n"
                changes_walkthrough += "</details>\n\n"
            elif key.lower().strip() == 'description':
                if isinstance(value, list):
                    value = ', '.join(v.rstrip() for v in value)
                value = value.replace('\n-', '\n\n-').strip() # makes the bullet points more readable by adding double space
                pr_body += f"{value}\n"
            else:
                # if the value is a list, join its items by comma
                if isinstance(value, list):
                    value = ', '.join(v.rstrip() for v in value)
                pr_body += f"{value}\n"
            if idx < len(self.data) - 1:
                pr_body += "\n\n___\n\n"

        return title, pr_body, changes_walkthrough, pr_file_changes,

    def _prepare_file_labels(self):
        file_label_dict = {}
        if (not self.data or not isinstance(self.data, dict) or
                'pr_files' not in self.data or not self.data['pr_files']):
            return file_label_dict
        for file in self.data['pr_files']:
            try:
                required_fields = ['changes_title', 'filename', 'label']
                if not all(field in file for field in required_fields):
                    # can happen for example if a YAML generation was interrupted in the middle (no more tokens)
                    get_logger().warning(f"Missing required fields in file label dict {self.pr_id}, skipping file",
                                         artifact={"file": file})
                    continue
                if not file.get('changes_title'):
                    get_logger().warning(f"Empty changes title or summary in file label dict {self.pr_id}, skipping file",
                                         artifact={"file": file})
                    continue
                filename = file['filename'].replace("'", "`").replace('"', '`')
                changes_summary = file.get('changes_summary', "")
                if not changes_summary and self.vars.get('include_file_summary_changes', True):
                    get_logger().warning(f"Empty changes summary in file label dict, skipping file",
                                         artifact={"file": file})
                    continue
                changes_summary = changes_summary.strip()
                changes_title = file['changes_title'].strip()
                label = file.get('label').strip().lower()
                if label not in file_label_dict:
                    file_label_dict[label] = []
                file_label_dict[label].append((filename, changes_title, changes_summary))
            except Exception as e:
                get_logger().exception(f"Error preparing file label dict {self.pr_id}")
                pass
        return file_label_dict

    def process_pr_files_prediction(self, pr_body, value):
        pr_comments = []
        # logic for using collapsible file list
        use_collapsible_file_list = get_settings().pr_description.collapsible_file_list
        num_files = 0
        if value:
            for semantic_label in value.keys():
                num_files += len(value[semantic_label])
        if use_collapsible_file_list == "adaptive":
            use_collapsible_file_list = num_files > self.COLLAPSIBLE_FILE_LIST_THRESHOLD

        if not self.git_provider.is_supported("gfm_markdown"):
            return pr_body, pr_comments
        try:
            pr_body += "<table>"
            header = f"Relevant files"
            delta = 75
            # header += "&nbsp; " * delta
            pr_body += f"""<thead><tr><th></th><th align="left">{header}</th></tr></thead>"""
            pr_body += """<tbody>"""
            for semantic_label in value.keys():
                s_label = semantic_label.strip("'").strip('"')
                pr_body += f"""<tr><td><strong>{s_label.capitalize()}</strong></td>"""
                list_tuples = value[semantic_label]

                if use_collapsible_file_list:
                    pr_body += f"""<td><details><summary>{len(list_tuples)} files</summary><table>"""
                else:
                    pr_body += f"""<td><table>"""
                for filename, file_changes_title, file_change_description in list_tuples:
                    filename = filename.replace("'", "`").rstrip()
                    filename_publish = filename.split("/")[-1]
                    if file_changes_title and file_changes_title.strip() != "...":
                        file_changes_title_code = f"<code>{file_changes_title}</code>"
                        file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip()
                        if len(file_changes_title_code_br) < (delta - 5):
                            file_changes_title_code_br += "&nbsp; " * ((delta - 5) - len(file_changes_title_code_br))
                        filename_publish = f"<strong>{filename_publish}</strong><dd>{file_changes_title_code_br}</dd>"
                    else:
                        filename_publish = f"<strong>{filename_publish}</strong>"
                    diff_plus_minus = ""
                    delta_nbsp = ""
                    diff_files = self.git_provider.get_diff_files()
                    for f in diff_files:
                        if f.filename.lower().strip('/') == filename.lower().strip('/'):
                            num_plus_lines = f.num_plus_lines
                            num_minus_lines = f.num_minus_lines
                            diff_plus_minus += f"+{num_plus_lines}/-{num_minus_lines}"
                            if len(diff_plus_minus) > 12 or diff_plus_minus == "+0/-0":
                                diff_plus_minus = "[link]"
                            delta_nbsp = "&nbsp; " * max(0, (8 - len(diff_plus_minus)))
                            break

                    # try to add line numbers link to code suggestions
                    link = ""
                    if hasattr(self.git_provider, 'get_line_link'):
                        filename = filename.strip()
                        link = self.git_provider.get_line_link(filename, relevant_line_start=-1)
                    if (not link or not diff_plus_minus) and ('additional files' not in filename.lower()):
                        # get_logger().warning(f"Error getting line link for '{filename}'")
                        link = ""
                        # continue

                    # Add file data to the PR body
                    file_change_description_br = insert_br_after_x_chars(file_change_description, x=(delta - 5))
                    pr_body = self.add_file_data(delta_nbsp, diff_plus_minus, file_change_description_br, filename,
                                                 filename_publish, link, pr_body)

                # Close the collapsible file list
                if use_collapsible_file_list:
                    pr_body += """</table></details></td></tr>"""
                else:
                    pr_body += """</table></td></tr>"""
            pr_body += """</tr></tbody></table>"""

        except Exception as e:
            get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {str(e)}")
            pass
        return pr_body, pr_comments

    def add_file_data(self, delta_nbsp, diff_plus_minus, file_change_description_br, filename, filename_publish, link,
                      pr_body) -> str:

        if not file_change_description_br:
            pr_body += f"""
<tr>
  <td>{filename_publish}</td>
  <td><a href="{link}">{diff_plus_minus}</a>{delta_nbsp}</td>

</tr>
"""
        else:
            pr_body += f"""
<tr>
  <td>
    <details>
      <summary>{filename_publish}</summary>
<hr>

{filename}

{file_change_description_br}


</details>


  </td>
  <td><a href="{link}">{diff_plus_minus}</a>{delta_nbsp}</td>

</tr>
"""
        return pr_body

def count_chars_without_html(string):
    if '<' not in string:
        return len(string)
    no_html_string = re.sub('<[^>]+>', '', string)
    return len(no_html_string)


def insert_br_after_x_chars(text: str, x=70):
    """
    Insert <br> into a string after a word that increases its length above x characters.
    Use proper HTML tags for code and new lines.
    """

    if not text:
        return ""
    if count_chars_without_html(text) < x:
        return text

    is_list = text.lstrip().startswith(("- ", "* "))

    # replace odd instances of ` with <code> and even instances of ` with </code>
    text = replace_code_tags(text)

    # convert list items to <li> only if the text is identified as a list
    if is_list:
        # To handle lists that start with indentation
        leading_whitespace = text[:len(text) - len(text.lstrip())]
        body = text.lstrip()
        body = "<li>" + body[2:]
        text = leading_whitespace + body

        text = text.replace("\n- ", '<br><li> ').replace("\n - ", '<br><li> ')
        text = text.replace("\n* ", '<br><li> ').replace("\n * ", '<br><li> ')

    # convert new lines to <br>
    text = text.replace("\n", '<br>')

    # split text into lines
    lines = text.split('<br>')
    words = []
    for i, line in enumerate(lines):
        words += line.split(' ')
        if i < len(lines) - 1:
            words[-1] += "<br>"

    new_text = []
    is_inside_code = False
    current_length = 0
    for word in words:
        is_saved_word = False
        if word == "<code>" or word == "</code>" or word == "<li>" or word == "<br>":
            is_saved_word = True

        len_word = count_chars_without_html(word)
        if not is_saved_word and (current_length + len_word > x):
            if is_inside_code:
                new_text.append("</code><br><code>")
            else:
                new_text.append("<br>")
            current_length = 0  # Reset counter
        new_text.append(word + " ")

        if not is_saved_word:
            current_length += len_word + 1  # Add 1 for the space

        if word == "<li>" or word == "<br>":
            current_length = 0

        if "<code>" in word:
            is_inside_code = True
        if "</code>" in word:
            is_inside_code = False

    processed_text = ''.join(new_text).strip()

    if is_list:
        processed_text = f"<ul>{processed_text}</ul>"

    return processed_text


def replace_code_tags(text):
    """
    Replace odd instances of ` with <code> and even instances of ` with </code>
    """
    parts = text.split('`')
    for i in range(1, len(parts), 2):
        parts[i] = '<code>' + parts[i] + '</code>'
    return ''.join(parts)


================================================
FILE: pr_agent/tools/pr_generate_labels.py
================================================
import copy
import re
from functools import partial
from typing import List, Tuple

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import get_user_labels, load_yaml, set_custom_labels
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger


class PRGenerateLabels:
    def __init__(self, pr_url: str, args: list = None,
                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        """
        Initialize the PRGenerateLabels object with the necessary attributes and objects for generating labels
        corresponding to the PR using an AI model.
        Args:
            pr_url (str): The URL of the pull request.
            args (list, optional): List of arguments passed to the PRGenerateLabels class. Defaults to None.
        """
        # Initialize the git provider and main PR language
        self.git_provider = get_git_provider()(pr_url)
        self.main_pr_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )
        self.pr_id = self.git_provider.get_pr_id()

        # Initialize the AI handler
        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_pr_language

        # Initialize the variables dictionary
        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.git_provider.get_pr_description(full=False),
            "language": self.main_pr_language,
            "diff": "",  # empty diff for initial calculation
            "extra_instructions": get_settings().pr_description.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
            "enable_custom_labels": get_settings().config.enable_custom_labels,
            "custom_labels_class": "",  # will be filled if necessary in 'set_custom_labels' function
        }

        # Initialize the token handler
        self.token_handler = TokenHandler(
            self.git_provider.pr,
            self.vars,
            get_settings().pr_custom_labels_prompt.system,
            get_settings().pr_custom_labels_prompt.user,
        )

        # Initialize patches_diff and prediction attributes
        self.patches_diff = None
        self.prediction = None

    async def run(self):
        """
        Generates a PR labels using an AI model and publishes it to the PR.
        """

        try:
            get_logger().info(f"Generating a PR labels {self.pr_id}")
            if get_settings().config.publish_output:
                self.git_provider.publish_comment("Preparing PR labels...", is_temporary=True)

            await retry_with_fallback_models(self._prepare_prediction)

            get_logger().info(f"Preparing answer {self.pr_id}")
            if self.prediction:
                self._prepare_data()
            else:
                return None

            pr_labels = self._prepare_labels()

            if get_settings().config.publish_output:
                get_logger().info(f"Pushing labels {self.pr_id}")

                current_labels = self.git_provider.get_pr_labels()
                user_labels = get_user_labels(current_labels)
                pr_labels = pr_labels + user_labels

                if self.git_provider.is_supported("get_labels"):
                    self.git_provider.publish_labels(pr_labels)
                elif pr_labels:
                    value = ', '.join(v for v in pr_labels)
                    pr_labels_text = f"## PR Labels:\n{value}\n"
                    self.git_provider.publish_comment(pr_labels_text, is_temporary=False)
                self.git_provider.remove_initial_comment()
        except Exception as e:
            get_logger().error(f"Error generating PR labels {self.pr_id}: {e}")

        return ""

    async def _prepare_prediction(self, model: str) -> None:
        """
        Prepare the AI prediction for the PR labels based on the provided model.

        Args:
            model (str): The name of the model to be used for generating the prediction.

        Returns:
            None

        Raises:
            Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions.

        """

        get_logger().info(f"Getting PR diff {self.pr_id}")
        self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
        get_logger().info(f"Getting AI prediction {self.pr_id}")
        self.prediction = await self._get_prediction(model)

    async def _get_prediction(self, model: str) -> str:
        """
        Generate an AI prediction for the PR labels based on the provided model.

        Args:
            model (str): The name of the model to be used for generating the prediction.

        Returns:
            str: The generated AI prediction.
        """
        variables = copy.deepcopy(self.vars)
        variables["diff"] = self.patches_diff  # update diff

        environment = Environment(undefined=StrictUndefined)
        set_custom_labels(variables, self.git_provider)
        self.variables = variables

        system_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.system).render(self.variables)
        user_prompt = environment.from_string(get_settings().pr_custom_labels_prompt.user).render(self.variables)

        response, finish_reason = await self.ai_handler.chat_completion(
            model=model,
            temperature=get_settings().config.temperature,
            system=system_prompt,
            user=user_prompt
        )

        return response

    def _prepare_data(self):
        # Load the AI prediction data into a dictionary
        self.data = load_yaml(self.prediction.strip())


    def _prepare_labels(self) -> List[str]:
        pr_types = []

        # If the 'labels' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
        if 'labels' in self.data:
            if type(self.data['labels']) == list:
                pr_types = self.data['labels']
            elif type(self.data['labels']) == str:
                pr_types = self.data['labels'].split(',')
        pr_types = [label.strip() for label in pr_types]

        # convert lowercase labels to original case
        try:
            if "labels_minimal_to_labels_dict" in self.variables:
                d: dict = self.variables["labels_minimal_to_labels_dict"]
                for i, label_i in enumerate(pr_types):
                    if label_i in d:
                        pr_types[i] = d[label_i]
        except Exception as e:
            get_logger().error(f"Error converting labels to original case {self.pr_id}: {e}")

        return pr_types


================================================
FILE: pr_agent/tools/pr_help_docs.py
================================================
import copy
from functools import partial

from jinja2 import Environment, StrictUndefined
import math
import os
import re
from tempfile import TemporaryDirectory

from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import clip_tokens, get_max_tokens, load_yaml, ModelType
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider_with_context
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage


#Common code that can be called from similar tools:
def modify_answer_section(ai_response: str) -> str | None:
    # Gets the model's answer and relevant sources section, replacing the heading of the answer section with:
    # :bulb: Auto-generated documentation-based answer:
    """
    For example: The following input:

    ### Question: \nThe following general issue was asked by a user: Title: How does one request to re-review a PR? More Info: I cannot seem to find to do this.
    ### Answer:\nAccording to the documentation, one needs to invoke the command: /review
    #### Relevant Sources...

    Should become:

    ### :bulb: Auto-generated documentation-based answer:\n
    According to the documentation, one needs to invoke the command: /review
    #### Relevant Sources...
    """
    model_answer_and_relevant_sections_in_response \
        = extract_model_answer_and_relevant_sources(ai_response)
    if model_answer_and_relevant_sections_in_response is not None:
        cleaned_question_with_answer = "### :bulb: Auto-generated documentation-based answer:\n"
        cleaned_question_with_answer += model_answer_and_relevant_sections_in_response
        return cleaned_question_with_answer
    get_logger().warning(f"Either no answer section found, or that section is malformed: {ai_response}")
    return None

def extract_model_answer_and_relevant_sources(ai_response: str) -> str | None:
    # It is assumed that the input contains several sections with leading "### ",
    # where the answer is the last one of them having the format: "### Answer:\n"), since the model returns the answer
    # AFTER the user question. By splitting using the string: "### Answer:\n" and grabbing the last part,
    # the model answer is guaranteed to be in that last part, provided it is followed by a "#### Relevant Sources:\n\n".
    # (for more details, see here: https://github.com/Codium-ai/pr-agent-pro/blob/main/pr_agent/tools/pr_help_message.py#L173)
    """
    For example:
    ### Question: \nHow does one request to re-review a PR?\n\n
    ### Answer:\nAccording to the documentation, one needs to invoke the command: /review\n\n
    #### Relevant Sources:\n\n...

    The answer part is: "According to the documentation, one needs to invoke the command: /review\n\n"
    followed by "Relevant Sources:\n\n".
    """
    if "### Answer:\n" in ai_response:
        model_answer_and_relevant_sources_sections_in_response = ai_response.split("### Answer:\n")[-1]
        # Split such part by "Relevant Sources" section to contain only the model answer:
        if "#### Relevant Sources:\n\n" in model_answer_and_relevant_sources_sections_in_response:
            model_answer_section_in_response \
                = model_answer_and_relevant_sources_sections_in_response.split("#### Relevant Sources:\n\n")[0]
            get_logger().info(f"Found model answer: {model_answer_section_in_response}")
            return model_answer_and_relevant_sources_sections_in_response \
                if len(model_answer_section_in_response) > 0 else None
    get_logger().warning(f"Either no answer section found, or that section is malformed: {ai_response}")
    return None

def get_maximal_text_input_length_for_token_count_estimation():
    model = get_settings().config.model
    if 'claude-3-7-sonnet' in model.lower():
        return 900000 #Claude API for token estimation allows maximal text input of 900K chars
    return math.inf #Otherwise, no known limitation on input text just for token estimation

def return_document_headings(text: str, ext: str) -> str:
    try:
        lines = text.split('\n')
        headings = set()

        if not text or not re.search(r'[a-zA-Z]', text):
            get_logger().error(f"Empty or non text content found in text: {text}.")
            return ""

        if ext in ['.md', '.mdx']:
            # Extract Markdown headings (lines starting with #)
            headings = {line.strip() for line in lines if line.strip().startswith('#')}
        elif ext == '.rst':
            # Find indices of lines that have all same character:
            #Allowed characters according to list from: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#sections
            section_chars = set('!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~')

            # Find potential section marker lines (underlines/overlines): They have to be the same character
            marker_lines = []
            for i, line in enumerate(lines):
                line = line.rstrip()
                if line and all(c == line[0] for c in line) and line[0] in section_chars:
                    marker_lines.append((i, len(line)))

            # Check for headings adjacent to marker lines (below + text must be in length equal or less)
            for idx, length in marker_lines:
                # Check if it's an underline (heading is above it)
                if idx > 0 and lines[idx - 1].rstrip() and len(lines[idx - 1].rstrip()) <= length:
                    headings.add(lines[idx - 1].rstrip())
        else:
            get_logger().error(f"Unsupported file extension: {ext}")
            return ""

        return '\n'.join(headings)
    except Exception as e:
        get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
        return ""

# Load documentation files to memory: full file path (as will be given as prompt) -> doc contents
def map_documentation_files_to_contents(base_path: str, doc_files: list[str], max_allowed_file_len=5000) -> dict[str, str]:
    try:
        returned_dict = {}
        for file in doc_files:
            try:
                with open(file, 'r', encoding='utf-8') as f:
                    content = f.read()
                    # Skip files with no text content
                    if not re.search(r'[a-zA-Z]', content):
                        continue
                    if len(content) > max_allowed_file_len:
                        get_logger().warning(f"File {file} length: {len(content)} exceeds limit: {max_allowed_file_len}, so it will be trimmed.")
                        content = content[:max_allowed_file_len]
                    file_path = str(file).replace(str(base_path), '')
                    returned_dict[file_path] = content.strip()
            except Exception as e:
                get_logger().warning(f"Error while reading the file {file}: {e}")
                continue
        if not returned_dict:
            get_logger().error("Couldn't find any usable documentation files. Returning empty dict.")
        return returned_dict
    except Exception as e:
        get_logger().exception(f"Unexpected exception thrown. Returning empty dict.")
        return {}

# Goes over files' contents, generating payload for prompt while decorating them with a header to mark where each file begins,
# as to help the LLM to give a better answer.
def aggregate_documentation_files_for_prompt_contents(file_path_to_contents: dict[str, str], return_just_headings=False) -> str:
    try:
        docs_prompt = ""
        for idx, file_path in enumerate(file_path_to_contents):
            file_contents = file_path_to_contents[file_path].strip()
            if not file_contents:
                get_logger().error(f"Got empty file contents for: {file_path}. Skipping this file.")
                continue
            if return_just_headings:
                file_headings = return_document_headings(file_contents, os.path.splitext(file_path)[-1]).strip()
                if file_headings:
                    docs_prompt += f"\n==file name==\n\n{file_path}\n\n==index==\n\n{idx}\n\n==file headings==\n\n{file_headings}\n=========\n\n"
                else:
                    get_logger().warning(f"No headers for: {file_path}. Will only use filename")
                    docs_prompt += f"\n==file name==\n\n{file_path}\n\n==index==\n\n{idx}\n\n"
            else:
                docs_prompt += f"\n==file name==\n\n{file_path}\n\n==file content==\n\n{file_contents}\n=========\n\n"
        return docs_prompt
    except Exception as e:
        get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
        return ""

def format_markdown_q_and_a_response(question_str: str, response_str: str, relevant_sections: list[dict[str, str]],
                                     supported_suffixes: list[str], base_url_prefix: str, base_url_suffix: str="") -> str:
    try:
        base_url_prefix = base_url_prefix.strip('/') #Sanitize base_url_prefix
        answer_str = ""
        answer_str += f"### Question: \n{question_str}\n\n"
        answer_str += f"### Answer:\n{response_str.strip()}\n\n"
        answer_str += f"#### Relevant Sources:\n\n"
        for section in relevant_sections:
            file = section.get('file_name').lstrip('/').strip() #Remove any '/' in the beginning, since some models do it anyway
            ext = [suffix for suffix in supported_suffixes if file.endswith(suffix)]
            if not ext:
                get_logger().warning(f"Unsupported file extension: {file}")
                continue
            if str(section['relevant_section_header_string']).strip():
                markdown_header = format_markdown_header(section['relevant_section_header_string'])
                if base_url_prefix:
                    answer_str += f"> - {base_url_prefix}/{file}{base_url_suffix}#{markdown_header}\n"
            else:
                answer_str += f"> - {base_url_prefix}/{file}{base_url_suffix}\n"
        return answer_str
    except Exception as e:
        get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
        return ""

def format_markdown_header(header: str) -> str:
    try:
        # First, strip common characters from both ends
        cleaned = header.strip('# 💎\n')

        # Define all characters to be removed/replaced in a single pass
        replacements = {
            "'": '',
            "`": '',
            '(': '',
            ')': '',
            ',': '',
            '.': '',
            '?': '',
            '!': '',
            ' ': '-'
        }

        # Compile regex pattern for characters to remove
        pattern = re.compile('|'.join(map(re.escape, replacements.keys())))

        # Perform replacements in a single pass and convert to lowercase
        return pattern.sub(lambda m: replacements[m.group()], cleaned).lower()
    except Exception:
        get_logger().exception(f"Error while formatting markdown header", artifacts={'header': header})
        return ""

def clean_markdown_content(content: str) -> str:
    """
    Remove hidden comments and unnecessary elements from markdown content to reduce size.

    Args:
        content: The original markdown content

    Returns:
        Cleaned markdown content
    """
    try:
        # Remove HTML comments
        content = re.sub(r'<!--.*?-->', '', content, flags=re.DOTALL)

        # Remove frontmatter (YAML between --- or +++ delimiters)
        content = re.sub(r'^---\s*\n.*?\n---\s*\n', '', content, flags=re.DOTALL)
        content = re.sub(r'^\+\+\+\s*\n.*?\n\+\+\+\s*\n', '', content, flags=re.DOTALL)

        # Remove excessive blank lines (more than 2 consecutive)
        content = re.sub(r'\n{3,}', '\n\n', content)

        # Remove HTML tags that are often used for styling only
        content = re.sub(r'<div.*?>|</div>|<span.*?>|</span>', '', content, flags=re.DOTALL)

        # Remove image alt text which can be verbose
        content = re.sub(r'!\[(.*?)\]', '![]', content)

        # Remove images completely
        content = re.sub(r'!\[.*?\]\(.*?\)', '', content)

        # Remove simple HTML tags but preserve content between them
        content = re.sub(r'<(?!table|tr|td|th|thead|tbody)([a-zA-Z][a-zA-Z0-9]*)[^>]*>(.*?)</\1>',
                         r'\2', content, flags=re.DOTALL)
        return content.strip()
    except Exception as e:
        get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
        return ""

class PredictionPreparator:
    def __init__(self, ai_handler, vars, system_prompt, user_prompt):
        try:
            self.ai_handler = ai_handler
            variables = copy.deepcopy(vars)
            environment = Environment(undefined=StrictUndefined)
            self.system_prompt = environment.from_string(system_prompt).render(variables)
            self.user_prompt = environment.from_string(user_prompt).render(variables)
        except Exception as e:
            get_logger().exception(f"Caught exception during init. Setting ai_handler to None to prevent __call__.")
            self.ai_handler = None

    #Called by retry_with_fallback_models and therefore, on any failure must throw an exception:
    async def __call__(self, model: str) -> str:
        if not self.ai_handler:
            get_logger().error("ai handler not set. Cannot invoke model!")
            raise ValueError("PredictionPreparator not initialized")
        try:
            response, finish_reason = await self.ai_handler.chat_completion(
                model=model, temperature=get_settings().config.temperature, system=self.system_prompt, user=self.user_prompt)
            return response
        except Exception as e:
            get_logger().exception("Caught exception during prediction.", artifacts={'system': self.system_prompt, 'user': self.user_prompt})
            raise e


class PRHelpDocs(object):
    def __init__(self, ctx_url, ai_handler:partial[BaseAiHandler,] = LiteLLMAIHandler, args: tuple[str]=None, return_as_string: bool=False):
        try:
            self.ctx_url = ctx_url
            self.question = args[0] if args else None
            self.return_as_string = return_as_string
            self.repo_url_given_explicitly = True
            self.repo_url = get_settings().get('PR_HELP_DOCS.REPO_URL', '')
            self.repo_desired_branch = get_settings().get('PR_HELP_DOCS.REPO_DEFAULT_BRANCH', 'main') #Ignored if self.repo_url is empty
            self.include_root_readme_file = not(get_settings()['PR_HELP_DOCS.EXCLUDE_ROOT_README'])
            self.supported_doc_exts = get_settings()['PR_HELP_DOCS.SUPPORTED_DOC_EXTS']
            self.docs_path = get_settings()['PR_HELP_DOCS.DOCS_PATH']

            retrieved_settings = [self.include_root_readme_file, self.supported_doc_exts, self.docs_path]
            if any([setting is None for setting in retrieved_settings]):
                raise Exception(f"One of the settings is invalid: {retrieved_settings}")

            self.git_provider = get_git_provider_with_context(ctx_url)
            if not self.git_provider:
                raise Exception(f"No git provider found at {ctx_url}")
            if not self.repo_url:
                self.repo_url_given_explicitly = False
                get_logger().debug(f"No explicit repo url provided, deducing it from type: {self.git_provider.__class__.__name__} "
                                  f"context url: {self.ctx_url}")
                self.repo_url = self.git_provider.get_git_repo_url(self.ctx_url)
                if not self.repo_url:
                    raise Exception(f"Unable to deduce repo url from type: {self.git_provider.__class__.__name__} url: {self.ctx_url}")
                get_logger().debug(f"deduced repo url: {self.repo_url}")
                self.repo_desired_branch = None #Inferred from the repo provider.

            self.ai_handler = ai_handler()
            self.vars = {
                "docs_url": self.repo_url,
                "question": self.question,
                "snippets": "",
            }
            self.token_handler = TokenHandler(None,
                                                  self.vars,
                                                  get_settings().pr_help_docs_prompts.system,
                                                  get_settings().pr_help_docs_prompts.user)
        except Exception as e:
            get_logger().exception(f"Caught exception during init. Setting self.question to None to prevent run() to do anything.")
            self.question = None

    async def run(self):
        if not self.question:
            get_logger().warning('No question provided. Will do nothing.')
            return None

        try:
            # Clone the repository and gather relevant documentation files.
            docs_filepath_to_contents = self._gen_filenames_to_contents_map_from_repo()

            #Generate prompt for the AI model. This will be the full text of all the documentation files combined.
            docs_prompt = aggregate_documentation_files_for_prompt_contents(docs_filepath_to_contents)
            if not docs_filepath_to_contents or not docs_prompt:
                get_logger().warning(f"Could not find any usable documentation. Returning with no result...")
                return None
            docs_prompt_to_send_to_model = docs_prompt

            # Estimate how many tokens will be needed.
            # In case the expected number of tokens exceeds LLM limits, retry with just headings, asking the LLM to rank according to relevance to the question.
            # Based on returned ranking, rerun but sort the documents accordingly, this time, trim in case of exceeding limit.

            #First, check if the text is not too long to even query the LLM provider:
            max_allowed_txt_input = get_maximal_text_input_length_for_token_count_estimation()
            invoke_llm_just_with_headings = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,
                                                                  only_return_if_trim_needed=True)
            if invoke_llm_just_with_headings:
                #Entire docs is too long. Rank and return according to relevance.
                docs_prompt_to_send_to_model = await self._rank_docs_and_return_them_as_prompt(docs_filepath_to_contents,
                                                                                         max_allowed_txt_input)

            if not docs_prompt_to_send_to_model:
                get_logger().error("Failed to generate docs prompt for model. Returning with no result...")
                return
            # At this point, either all original documents be used (if their total length doesn't exceed limits), or only those selected.
            self.vars['snippets'] = docs_prompt_to_send_to_model.strip()
            # Run the AI model and extract sections from its response
            response = await retry_with_fallback_models(PredictionPreparator(self.ai_handler, self.vars,
                                                                             get_settings().pr_help_docs_prompts.system,
                                                                             get_settings().pr_help_docs_prompts.user),
                                                        model_type=ModelType.REGULAR)
            response_yaml = load_yaml(response)
            if not response_yaml:
                get_logger().error("Failed to parse the AI response.", artifacts={'response': response})
                return
            response_str = response_yaml.get('response')
            relevant_sections = response_yaml.get('relevant_sections')
            if not response_str or not relevant_sections:
                get_logger().error("Failed to extract response/relevant sections.",
                                       artifacts={'raw_response': response, 'response_str': response_str, 'relevant_sections': relevant_sections})
                return
            if int(response_yaml.get('question_is_relevant', '1')) == 0:
                get_logger().warning(f"Question is not relevant. Returning without an answer...",
                                         artifacts={'raw_response': response})
                return

            # Format the response as markdown
            answer_str = self._format_model_answer(response_str, relevant_sections)
            if self.return_as_string: #Skip publishing
                return answer_str
            #Otherwise, publish the answer if answer is non empty and publish is not turned off:
            if answer_str and get_settings().config.publish_output:
                self.git_provider.publish_comment(answer_str)
            else:
                get_logger().info("Answer:", artifacts={'answer_str': answer_str})
            return answer_str
        except Exception as e:
            get_logger().exception('failed to provide answer to given user question as a result of a thrown exception (see above)')

    def _find_all_document_files_matching_exts(self, abs_docs_path: str, ignore_readme=False, max_allowed_files=5000) -> list[str]:
        try:
            matching_files = []

            # Ensure extensions don't have leading dots and are lowercase
            dotless_extensions = [ext.lower().lstrip('.') for ext in self.supported_doc_exts]

            # Walk through directory and subdirectories
            file_cntr = 0
            for root, _, files in os.walk(abs_docs_path):
                for file in files:
                    if ignore_readme and root == abs_docs_path and file.lower() in [f"readme.{ext}" for ext in dotless_extensions]:
                        continue
                    # Check if file has one of the specified extensions
                    if any(file.lower().endswith(f'.{ext}') for ext in dotless_extensions):
                        file_cntr+=1
                        matching_files.append(os.path.join(root, file))
                        if file_cntr >= max_allowed_files:
                            get_logger().warning(f"Found at least {max_allowed_files} files in {abs_docs_path}, skipping the rest.")
                            return matching_files
            return matching_files
        except Exception as e:
            get_logger().exception(f"Unexpected exception thrown. Returning empty list.")
            return []

    def _gen_filenames_to_contents_map_from_repo(self) -> dict[str, str]:
        try:
            with TemporaryDirectory() as tmp_dir:
                get_logger().debug(f"About to clone repository: {self.repo_url} to temporary directory: {tmp_dir}...")
                returned_cloned_repo_root = self.git_provider.clone(self.repo_url, tmp_dir, remove_dest_folder=False)
                if not returned_cloned_repo_root:
                    raise Exception(f"Failed to clone {self.repo_url} to {tmp_dir}")

                get_logger().debug(f"About to gather relevant documentation files...")
                doc_files = []
                if self.include_root_readme_file:
                    for root, _, files in os.walk(returned_cloned_repo_root.path):
                        # Only look at files in the root directory, not subdirectories
                        if root == returned_cloned_repo_root.path:
                            for file in files:
                                if file.lower().startswith("readme."):
                                    doc_files.append(os.path.join(root, file))
                abs_docs_path = os.path.join(returned_cloned_repo_root.path, self.docs_path)
                if os.path.exists(abs_docs_path):
                    doc_files.extend(self._find_all_document_files_matching_exts(abs_docs_path,
                                                                                 ignore_readme=(self.docs_path=='.')))
                    if not doc_files:
                        get_logger().warning(f"No documentation files found matching file extensions: "
                                             f"{self.supported_doc_exts} under repo: {self.repo_url} "
                                             f"path: {self.docs_path}. Returning empty dict.")
                        return {}

                get_logger().info(f'For context {self.ctx_url} and repo: {self.repo_url}'
                                  f' will be using the following documentation files: ',
                                  artifacts={'doc_files': doc_files})

                return map_documentation_files_to_contents(returned_cloned_repo_root.path, doc_files)
        except Exception as e:
            get_logger().exception(f"Unexpected exception thrown. Returning empty dict.")
            return {}

    def _trim_docs_input(self, docs_input: str, max_allowed_txt_input: int, only_return_if_trim_needed=False) -> bool|str:
        try:
            if len(docs_input) >= max_allowed_txt_input:
                get_logger().warning(
                    f"Text length: {len(docs_input)} exceeds the current returned limit of {max_allowed_txt_input} just for token count estimation. Trimming the text...")
                if only_return_if_trim_needed:
                    return True
                docs_input = docs_input[:max_allowed_txt_input]
            # Then, count the tokens in the prompt. If the count exceeds the limit, trim the text.
            token_count = self.token_handler.count_tokens(docs_input, force_accurate=True)
            get_logger().debug(f"Estimated token count of documentation to send to model: {token_count}")
            model = get_settings().config.model
            if model in MAX_TOKENS:
                max_tokens_full = MAX_TOKENS[
                    model]  # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
            else:
                max_tokens_full = get_max_tokens(model)
            delta_output = 5000  # Elbow room to reduce chance of exceeding token limit or model paying less attention to prompt guidelines.
            if token_count > max_tokens_full - delta_output:
                if only_return_if_trim_needed:
                    return True
                docs_input = clean_markdown_content(
                    docs_input)  # Reduce unnecessary text/images/etc.
                get_logger().info(
                    f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Attempting to clip text to fit within the limit...")
                docs_input = clip_tokens(docs_input, max_tokens_full - delta_output,
                                                           num_input_tokens=token_count)
            if only_return_if_trim_needed:
                return False
            return docs_input
        except Exception as e:
            # Unexpected exception. Rethrowing it since:
            # 1. This is an internal function.
            # 2. An empty str/False result is a valid one - would require now checking also for None.
            get_logger().exception(f"Unexpected exception thrown. Rethrowing it...")
            raise e

    async def _rank_docs_and_return_them_as_prompt(self, docs_filepath_to_contents: dict[str, str], max_allowed_txt_input: int) -> str:
        try:
            #Return just file name and their headings (if exist):
            docs_prompt_to_send_to_model = (
                aggregate_documentation_files_for_prompt_contents(docs_filepath_to_contents,
                                                                  return_just_headings=True))
            # Verify list of headings does not exceed limits - trim it if it does.
            docs_prompt_to_send_to_model = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,
                                                                 only_return_if_trim_needed=False)
            if not docs_prompt_to_send_to_model:
                get_logger().error("_trim_docs_input returned an empty result.")
                return ""

            self.vars['snippets'] = docs_prompt_to_send_to_model.strip()
            # Run the AI model and extract sections from its response
            response = await retry_with_fallback_models(PredictionPreparator(self.ai_handler, self.vars,
                                                                             get_settings().pr_help_docs_headings_prompts.system,
                                                                             get_settings().pr_help_docs_headings_prompts.user),
                                                        model_type=ModelType.REGULAR)
            response_yaml = load_yaml(response)
            if not response_yaml:
                get_logger().error("Failed to parse the AI response.", artifacts={'response': response})
                return ""
            # else: Sanitize the output so that the file names match 1:1 dictionary keys. Do this via the file index and not its name, which may be altered by the model.
            valid_indices = [int(entry['idx']) for entry in response_yaml.get('relevant_files_ranking')
                             if int(entry['idx']) >= 0 and int(entry['idx']) < len(docs_filepath_to_contents)]
            valid_file_paths = [list(docs_filepath_to_contents.keys())[idx] for idx in valid_indices]
            selected_docs_dict = {file_path: docs_filepath_to_contents[file_path] for file_path in valid_file_paths}
            docs_prompt = aggregate_documentation_files_for_prompt_contents(selected_docs_dict)
            docs_prompt_to_send_to_model = docs_prompt

            # Check if the updated list of documents does not exceed limits and trim if it does:
            docs_prompt_to_send_to_model = self._trim_docs_input(docs_prompt_to_send_to_model, max_allowed_txt_input,
                                                                 only_return_if_trim_needed=False)
            if not docs_prompt_to_send_to_model:
                get_logger().error("_trim_docs_input returned an empty result.")
                return ""
            return docs_prompt_to_send_to_model
        except Exception as e:
            get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
            return ""

    def _format_model_answer(self, response_str: str, relevant_sections: list[dict[str, str]]) -> str:
        try:
            canonical_url_prefix, canonical_url_suffix = (
                self.git_provider.get_canonical_url_parts(repo_git_url=self.repo_url if self.repo_url_given_explicitly else None,
                                                          desired_branch=self.repo_desired_branch))
            answer_str = format_markdown_q_and_a_response(self.question, response_str, relevant_sections,
                                                          self.supported_doc_exts, canonical_url_prefix, canonical_url_suffix)
            if answer_str:
                #Remove the question phrase and replace with light bulb and a heading mentioning this is an automated answer:
                answer_str = modify_answer_section(answer_str)
            #In case the response should not be published and returned as string, stop here:
            if answer_str and self.return_as_string:
                get_logger().info(f"Chat help docs answer", artifacts={'answer_str': answer_str})
                return answer_str
            if not answer_str:
                get_logger().info(f"No answer found")
                return ""
            if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_help_docs.enable_help_text:
                answer_str += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
                answer_str += HelpMessage.get_help_docs_usage_guide()
                answer_str += "\n</details>\n"
            return answer_str
        except Exception as e:
            get_logger().exception(f"Unexpected exception thrown. Returning empty result.")
            return ""


================================================
FILE: pr_agent/tools/pr_help_message.py
================================================
import copy
import re
from functools import partial
from pathlib import Path

from jinja2 import Environment, StrictUndefined

from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType, clip_tokens, load_yaml, get_max_tokens
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import BitbucketServerProvider, GithubProvider, get_git_provider_with_context
from pr_agent.log import get_logger


def extract_header(snippet):
    res = ''
    lines = snippet.split('===Snippet content===')[0].split('\n')
    highest_header = ''
    highest_level = float('inf')
    for line in lines[::-1]:
        line = line.strip()
        if line.startswith('Header '):
            highest_header = line.split(': ')[1]
    if highest_header:
        res = f"#{highest_header.lower().replace(' ', '-')}"
    return res

class PRHelpMessage:
    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler, return_as_string=False):
        self.git_provider = get_git_provider_with_context(pr_url)
        self.ai_handler = ai_handler()
        self.question_str = self.parse_args(args)
        self.return_as_string = return_as_string
        if self.question_str:
            self.vars = {
                "question": self.question_str,
                "snippets": "",
            }
            self.token_handler = TokenHandler(None,
                                              self.vars,
                                              get_settings().pr_help_prompts.system,
                                              get_settings().pr_help_prompts.user)

    async def _prepare_prediction(self, model: str):
        try:
            variables = copy.deepcopy(self.vars)
            environment = Environment(undefined=StrictUndefined)
            system_prompt = environment.from_string(get_settings().pr_help_prompts.system).render(variables)
            user_prompt = environment.from_string(get_settings().pr_help_prompts.user).render(variables)
            response, finish_reason = await self.ai_handler.chat_completion(
                model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
            return response
        except Exception as e:
            get_logger().error(f"Error while preparing prediction: {e}")
            return ""

    def parse_args(self, args):
        if args and len(args) > 0:
            question_str = " ".join(args)
        else:
            question_str = ""
        return question_str

    def format_markdown_header(self, header: str) -> str:
        try:
            # First, strip common characters from both ends
            cleaned = header.strip('# 💎\n')

            # Define all characters to be removed/replaced in a single pass
            replacements = {
                "'": '',
                "`": '',
                '(': '',
                ')': '',
                ',': '',
                '.': '',
                '?': '',
                '!': '',
                ' ': '-'
            }

            # Compile regex pattern for characters to remove
            pattern = re.compile('|'.join(map(re.escape, replacements.keys())))

            # Perform replacements in a single pass and convert to lowercase
            return pattern.sub(lambda m: replacements[m.group()], cleaned).lower()
        except Exception:
            get_logger().exception(f"Error while formatting markdown header", artifacts={'header': header})
            return ""


    async def run(self):
        try:
            if self.question_str:
                get_logger().info(f'Answering a PR question about the PR {self.git_provider.pr_url} ')

                if not get_settings().get('openai.key'):
                    if get_settings().config.publish_output:
                        self.git_provider.publish_comment(
                            "The `Help` tool chat feature requires an OpenAI API key for calculating embeddings")
                    else:
                        get_logger().error("The `Help` tool chat feature requires an OpenAI API key for calculating embeddings")
                    return

                # current path
                docs_path= Path(__file__).parent.parent.parent / 'docs' / 'docs'
                # get all the 'md' files inside docs_path and its subdirectories
                md_files = list(docs_path.glob('**/*.md'))
                folders_to_exclude = ['/finetuning_benchmark/']
                files_to_exclude = {'EXAMPLE_BEST_PRACTICE.md', 'compression_strategy.md', '/docs/overview/index.md'}
                md_files = [file for file in md_files if not any(folder in str(file) for folder in folders_to_exclude) and not any(file.name == file_to_exclude for file_to_exclude in files_to_exclude)]

                # sort the 'md_files' so that 'priority_files' will be at the top
                priority_files_strings = ['/docs/index.md', '/usage-guide', 'tools/describe.md', 'tools/review.md',
                                          'tools/improve.md', '/faq']
                md_files_priority = [file for file in md_files if
                                     any(priority_string in str(file) for priority_string in priority_files_strings)]
                md_files_not_priority = [file for file in md_files if file not in md_files_priority]
                md_files = md_files_priority + md_files_not_priority

                docs_prompt = ""
                for file in md_files:
                    try:
                        with open(file, 'r') as f:
                            file_path = str(file).replace(str(docs_path), '')
                            docs_prompt += f"\n==file name==\n\n{file_path}\n\n==file content==\n\n{f.read().strip()}\n=========\n\n"
                    except Exception as e:
                        get_logger().error(f"Error while reading the file {file}: {e}")
                token_count = self.token_handler.count_tokens(docs_prompt)
                get_logger().debug(f"Token count of full documentation website: {token_count}")

                model = get_settings().config.model
                if model in MAX_TOKENS:
                    max_tokens_full = MAX_TOKENS[model] # note - here we take the actual max tokens, without any reductions. we do aim to get the full documentation website in the prompt
                else:
                    max_tokens_full = get_max_tokens(model)
                delta_output = 2000
                if token_count > max_tokens_full - delta_output:
                    get_logger().info(f"Token count {token_count} exceeds the limit {max_tokens_full - delta_output}. Skipping the PR Help message.")
                    docs_prompt = clip_tokens(docs_prompt, max_tokens_full - delta_output)
                self.vars['snippets'] = docs_prompt.strip()

                # run the AI model
                response = await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
                response_yaml = load_yaml(response)
                if isinstance(response_yaml, str):
                    get_logger().warning(f"failing to parse response: {response_yaml}, publishing the response as is")
                    if get_settings().config.publish_output:
                        answer_str = f"### Question: \n{self.question_str}\n\n"
                        answer_str += f"### Answer:\n\n"
                        answer_str += response_yaml
                        self.git_provider.publish_comment(answer_str)
                    return ""
                response_str = response_yaml.get('response')
                relevant_sections = response_yaml.get('relevant_sections')

                if not relevant_sections:
                    get_logger().info(f"Could not find relevant answer for the question: {self.question_str}")
                    if get_settings().config.publish_output:
                        answer_str = f"### Question: \n{self.question_str}\n\n"
                        answer_str += f"### Answer:\n\n"
                        answer_str += f"Could not find relevant information to answer the question. Please provide more details and try again."
                        self.git_provider.publish_comment(answer_str)
                    return ""

                # prepare the answer
                answer_str = ""
                if response_str:
                    answer_str += f"### Question: \n{self.question_str}\n\n"
                    answer_str += f"### Answer:\n{response_str.strip()}\n\n"
                    answer_str += f"#### Relevant Sources:\n\n"
                    base_path = "https://qodo-merge-docs.qodo.ai/"
                    for section in relevant_sections:
                        file = section.get('file_name').strip().removesuffix('.md')
                        if str(section['relevant_section_header_string']).strip():
                            markdown_header = self.format_markdown_header(section['relevant_section_header_string'])
                            answer_str += f"> - {base_path}{file}#{markdown_header}\n"
                        else:
                            answer_str += f"> - {base_path}{file}\n"


                # publish the answer
                if get_settings().config.publish_output:
                    self.git_provider.publish_comment(answer_str)
                else:
                    get_logger().info(f"Answer:\n{answer_str}")
            else:
                if not isinstance(self.git_provider, BitbucketServerProvider) and not self.git_provider.is_supported("gfm_markdown"):
                    self.git_provider.publish_comment(
                        "The `Help` tool requires gfm markdown, which is not supported by your code platform.")
                    return

                get_logger().info('Getting PR Help Message...')
                relevant_configs = {'pr_help': dict(get_settings().pr_help),
                                    'config': dict(get_settings().config)}
                get_logger().debug("Relevant configs", artifacts=relevant_configs)
                pr_comment = "## PR Agent Walkthrough 🤖\n\n"
                pr_comment += "Welcome to the PR Agent, an AI-powered tool for automated pull request analysis, feedback, suggestions and more."""
                pr_comment += "\n\nHere is a list of tools you can use to interact with the PR Agent:\n"
                base_path = "https://pr-agent-docs.codium.ai/tools"

                tool_names = []
                tool_names.append(f"[DESCRIBE]({base_path}/describe/)")
                tool_names.append(f"[REVIEW]({base_path}/review/)")
                tool_names.append(f"[IMPROVE]({base_path}/improve/)")
                tool_names.append(f"[UPDATE CHANGELOG]({base_path}/update_changelog/)")
                tool_names.append(f"[HELP DOCS]({base_path}/help_docs/)")
                tool_names.append(f"[ADD DOCS]({base_path}/add_docs/)")
                tool_names.append(f"[ASK]({base_path}/ask/)")
                tool_names.append(f"[GENERATE CUSTOM LABELS]({base_path}/generate_labels/)")

                descriptions = []
                descriptions.append("Generates PR description - title, type, summary, code walkthrough and labels")
                descriptions.append("Adjustable feedback about the PR, possible issues, security concerns, review effort and more")
                descriptions.append("Code suggestions for improving the PR")
                descriptions.append("Automatically updates the changelog")
                descriptions.append("Answers a question regarding this repository, or a given one, based on given documentation path")
                descriptions.append("Generates documentation to methods/functions/classes that changed in the PR")
                descriptions.append("Answering free-text questions about the PR")
                descriptions.append("Generates custom labels for the PR, based on specific guidelines defined by the user")

                commands  =[]
                commands.append("`/describe`")
                commands.append("`/review`")
                commands.append("`/improve`")
                commands.append("`/update_changelog`")
                commands.append("`/help_docs`")
                commands.append("`/add_docs`")
                commands.append("`/ask`")
                commands.append("`/generate_labels`")

                checkbox_list = []
                checkbox_list.append(" - [ ] Run <!-- /describe -->")
                checkbox_list.append(" - [ ] Run <!-- /review -->")
                checkbox_list.append(" - [ ] Run <!-- /improve -->")
                checkbox_list.append(" - [ ] Run <!-- /update_changelog -->")
                checkbox_list.append(" - [ ] Run <!-- /help_docs -->")
                checkbox_list.append(" - [ ] Run <!-- /add_docs -->")
                checkbox_list.append("[*]")
                checkbox_list.append("[*]")
                checkbox_list.append("[*]")
                checkbox_list.append("[*]")
                checkbox_list.append("[*]")

                if isinstance(self.git_provider, GithubProvider) and not get_settings().config.get('disable_checkboxes', False):
                    pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Description</th><th align='left'>Trigger Interactively :gem:</th></tr>"
                    for i in range(len(tool_names)):
                        pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td>\n<td>{descriptions[i]}</td>\n<td>\n\n{checkbox_list[i]}\n</td></tr>"
                    pr_comment += "</table>\n\n"
                    pr_comment += f"""\n\n(1) Note that each tool can be [triggered automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#github-app-automatic-tools-when-a-new-pr-is-opened) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage)."""
                    pr_comment += f"""\n\n(2) Tools marked with [*] require additional parameters to be passed. For example, to invoke the `/ask` tool, you need to comment on a PR: `/ask "<question content>"`. See the relevant documentation for each tool for more details."""
                elif isinstance(self.git_provider, BitbucketServerProvider):
                    # only support basic commands in BBDC
                    pr_comment = generate_bbdc_table(tool_names[:4], descriptions[:4])
                else:
                    pr_comment += f"<table><tr align='left'><th align='left'>Tool</th><th align='left'>Command</th><th align='left'>Description</th></tr>"
                    for i in range(len(tool_names)):
                        pr_comment += f"\n<tr><td align='left'>\n\n<strong>{tool_names[i]}</strong></td><td>{commands[i]}</td><td>{descriptions[i]}</td></tr>"
                    pr_comment += "</table>\n\n"
                    pr_comment += f"""\n\nNote that each tool can be [invoked automatically](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/) when a new PR is opened, or called manually by [commenting on a PR](https://pr-agent-docs.codium.ai/usage-guide/automations_and_usage/#online-usage)."""

                if get_settings().config.publish_output:
                    self.git_provider.publish_comment(pr_comment)
        except Exception as e:
            get_logger().exception(f"Error while running PRHelpMessage: {e}")
        return ""

    async def prepare_relevant_snippets(self, sim_results):
        # Get relevant snippets
        relevant_snippets_full = []
        relevant_pages_full = []
        relevant_snippets_full_header = []
        th = 0.75
        for s in sim_results:
            page = s[0].metadata['source']
            content = s[0].page_content
            score = s[1]
            relevant_snippets_full.append(content)
            relevant_snippets_full_header.append(extract_header(content))
            relevant_pages_full.append(page)
        # build the snippets string
        relevant_snippets_str = ""
        for i, s in enumerate(relevant_snippets_full):
            relevant_snippets_str += f"Snippet {i+1}:\n\n{s}\n\n"
            relevant_snippets_str += "-------------------\n\n"
        return relevant_pages_full, relevant_snippets_full_header, relevant_snippets_str


def generate_bbdc_table(column_arr_1, column_arr_2):
    # Generating header row
    header_row = "| Tool  | Description | \n"

    # Generating separator row
    separator_row = "|--|--|\n"

    # Generating data rows
    data_rows = ""
    max_len = max(len(column_arr_1), len(column_arr_2))
    for i in range(max_len):
        col1 = column_arr_1[i] if i < len(column_arr_1) else ""
        col2 = column_arr_2[i] if i < len(column_arr_2) else ""
        data_rows += f"| {col1} | {col2} |\n"

    # Combine all parts to form the complete table
    markdown_table = header_row + separator_row + data_rows
    return markdown_table


================================================
FILE: pr_agent/tools/pr_line_questions.py
================================================
import argparse
import copy
from functools import partial

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.git_patch_processing import (
    decouple_and_convert_to_hunks_with_lines_numbers, extract_hunk_lines_from_patch)
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.git_providers.github_provider import GithubProvider
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage

class PR_LineQuestions:
    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        self.question_str = self.parse_args(args)
        self.git_provider = get_git_provider()(pr_url)
        self.main_pr_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )
        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_pr_language

        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "diff": "",  # empty diff for initial calculation
            "question": self.question_str,
            "full_hunk": "",
            "selected_lines": "",
            "conversation_history": "",  
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
                                          self.vars,
                                          get_settings().pr_line_questions_prompt.system,
                                          get_settings().pr_line_questions_prompt.user)
        self.patches_diff = None
        self.prediction = None

    def parse_args(self, args):
        if args and len(args) > 0:
            question_str = " ".join(args)
        else:
            question_str = ""
        return question_str


    async def run(self):
        get_logger().info('Answering a PR lines question...')
        # if get_settings().config.publish_output:
        #     self.git_provider.publish_comment("Preparing answer...", is_temporary=True)

        # set conversation history if enabled
        # currently only supports GitHub provider
        if get_settings().pr_questions.use_conversation_history and isinstance(self.git_provider, GithubProvider):
            conversation_history = self._load_conversation_history()
            self.vars["conversation_history"] = conversation_history

        self.patch_with_lines = ""
        ask_diff = get_settings().get('ask_diff_hunk', "")
        line_start = get_settings().get('line_start', '')
        line_end = get_settings().get('line_end', '')
        side = get_settings().get('side', 'RIGHT')
        file_name = get_settings().get('file_name', '')
        comment_id = get_settings().get('comment_id', '')
        if ask_diff:
            self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(ask_diff,
                                                                                       file_name,
                                                                                       line_start=line_start,
                                                                                       line_end=line_end,
                                                                                       side=side
                                                                                       )
        else:
            diff_files = self.git_provider.get_diff_files()
            for file in diff_files:
                if file.filename == file_name:
                    self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(file.patch, file.filename,
                                                                                               line_start=line_start,
                                                                                               line_end=line_end,
                                                                                               side=side)
        if self.patch_with_lines:
            model_answer = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.WEAK)
            # sanitize the answer so that no line will start with "/"
            model_answer_sanitized = model_answer.strip().replace("\n/", "\n /")
            if model_answer_sanitized.startswith("/"):
                model_answer_sanitized = " " + model_answer_sanitized

            get_logger().info('Preparing answer...')
            if comment_id:
                self.git_provider.reply_to_comment_from_comment_id(comment_id, model_answer_sanitized)
            else:
                self.git_provider.publish_comment(model_answer_sanitized)

        return ""
        
    def _load_conversation_history(self) -> str:
        """Generate conversation history from the code review thread
        
        Returns:
            str: The formatted conversation history
        """
        comment_id = get_settings().get('comment_id', '')
        file_path = get_settings().get('file_name', '')
        line_number = get_settings().get('line_end', '')
        
        # early return if any required parameter is missing
        if not all([comment_id, file_path, line_number]):
            get_logger().error("Missing required parameters for conversation history")
            return ""
        
        try:
            # retrieve thread comments
            thread_comments = self.git_provider.get_review_thread_comments(comment_id)
            
            # filter and prepare comments
            filtered_comments = []
            for comment in thread_comments:
                body = getattr(comment, 'body', '')

                # skip empty comments, current comment(will be added as a question at prompt)
                if not body or not body.strip() or comment_id == comment.id:
                    continue
                
                user = comment.user
                author = user.login if hasattr(user, 'login') else 'Unknown'
                filtered_comments.append((author, body))
            
            # transform conversation history to string using the same pattern as get_commit_messages
            if filtered_comments:
                comment_count = len(filtered_comments)
                get_logger().info(f"Loaded {comment_count} comments from the code review thread")
                
                # Format as numbered list, similar to get_commit_messages
                conversation_history_str = "\n".join([f"{i + 1}. {author}: {body}" 
                                                   for i, (author, body) in enumerate(filtered_comments)])
                return conversation_history_str
            
            return ""
        
        except Exception as e:
            get_logger().error(f"Error processing conversation history, error: {e}")
            return ""

    async def _get_prediction(self, model: str):
        variables = copy.deepcopy(self.vars)
        variables["full_hunk"] = self.patch_with_lines  # update diff
        variables["selected_lines"] = self.selected_lines
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(get_settings().pr_line_questions_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_line_questions_prompt.user).render(variables)
        if get_settings().config.verbosity_level >= 2:
            # get_logger().info(f"\nSystem prompt:\n{system_prompt}")
            # get_logger().info(f"\nUser prompt:\n{user_prompt}")
            print(f"\nSystem prompt:\n{system_prompt}")
            print(f"\nUser prompt:\n{user_prompt}")

        response, finish_reason = await self.ai_handler.chat_completion(
            model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
        return response


================================================
FILE: pr_agent/tools/pr_questions.py
================================================
import copy
from functools import partial

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider, GitLabProvider
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage


class PRQuestions:
    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        question_str = self.parse_args(args)
        self.pr_url = pr_url
        self.git_provider = get_git_provider()(pr_url)
        self.main_pr_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )
        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_pr_language

        self.question_str = question_str
        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.git_provider.get_pr_description(),
            "language": self.main_pr_language,
            "diff": "",  # empty diff for initial calculation
            "questions": self.question_str,
            "commit_messages_str": self.git_provider.get_commit_messages(),
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
                                          self.vars,
                                          get_settings().pr_questions_prompt.system,
                                          get_settings().pr_questions_prompt.user)
        self.patches_diff = None
        self.prediction = None

    def parse_args(self, args):
        if args and len(args) > 0:
            question_str = " ".join(args)
        else:
            question_str = ""
        return question_str

    async def run(self):
        get_logger().info(f'Answering a PR question about the PR {self.pr_url} ')
        relevant_configs = {'pr_questions': dict(get_settings().pr_questions),
                            'config': dict(get_settings().config)}
        get_logger().debug("Relevant configs", artifacts=relevant_configs)
        if get_settings().config.publish_output:
            self.git_provider.publish_comment("Preparing answer...", is_temporary=True)

        # identify image
        img_path = self.identify_image_in_comment()
        if img_path:
            get_logger().debug(f"Image path identified", artifact=img_path)

        await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK)

        pr_comment = self._prepare_pr_answer()
        get_logger().debug(f"PR output", artifact=pr_comment)

        if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_questions.enable_help_text:
            pr_comment += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
            pr_comment += HelpMessage.get_ask_usage_guide()
            pr_comment += "\n</details>\n"

        if get_settings().config.publish_output:
            self.git_provider.publish_comment(pr_comment)
            self.git_provider.remove_initial_comment()
        return ""

    def identify_image_in_comment(self):
        img_path = ''
        if '![image]' in self.question_str:
            # assuming structure:
            # /ask question ...  > ![image](img_path)
            img_path = self.question_str.split('![image]')[1].strip().strip('()')
            self.vars['img_path'] = img_path
        elif 'https://' in self.question_str and ('.png' in self.question_str or 'jpg' in self.question_str): # direct image link
            # include https:// in the image path
            img_path = 'https://' + self.question_str.split('https://')[1]
            self.vars['img_path'] = img_path
        return img_path

    async def _prepare_prediction(self, model: str):
        self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
        if self.patches_diff:
            get_logger().debug(f"PR diff", artifact=self.patches_diff)
            self.prediction = await self._get_prediction(model)
        else:
            get_logger().error(f"Error getting PR diff")
            self.prediction = ""

    async def _get_prediction(self, model: str):
        variables = copy.deepcopy(self.vars)
        variables["diff"] = self.patches_diff  # update diff
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(get_settings().pr_questions_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_questions_prompt.user).render(variables)
        if 'img_path' in variables:
            img_path = self.vars['img_path']
            response, finish_reason = await (self.ai_handler.chat_completion
                                             (model=model, temperature=get_settings().config.temperature,
                                              system=system_prompt, user=user_prompt, img_path=img_path))
        else:
            response, finish_reason = await self.ai_handler.chat_completion(
                model=model, temperature=get_settings().config.temperature, system=system_prompt, user=user_prompt)
        return response

    def gitlab_protections(self, model_answer: str) -> str:
        github_quick_actions_MR = ["/approve", "/close", "/merge", "/reopen", "/unapprove", "/title", "/assign",
                                "/copy_metadata", "/target_branch"]
        if any(action in model_answer for action in github_quick_actions_MR):
            str_err = "Model answer contains GitHub quick actions, which are not supported in GitLab"
            get_logger().error(str_err)
            return str_err
        return model_answer

    def _prepare_pr_answer(self) -> str:
        model_answer = self.prediction.strip()
        # sanitize the answer so that no line will start with "/"
        model_answer_sanitized = model_answer.replace("\n/", "\n /")
        model_answer_sanitized = model_answer_sanitized.replace("\r/", "\r /")
        if isinstance(self.git_provider, GitLabProvider):
            model_answer_sanitized = self.gitlab_protections(model_answer_sanitized)
        if model_answer_sanitized.startswith("/"):
            model_answer_sanitized = " " + model_answer_sanitized
        if model_answer_sanitized != model_answer:
            get_logger().debug(f"Sanitized model answer",
                               artifact={"model_answer": model_answer, "sanitized_answer": model_answer_sanitized})


        answer_str = f"### **Ask**❓\n{self.question_str}\n\n"
        answer_str += f"### **Answer:**\n{model_answer_sanitized}\n\n"
        return answer_str


================================================
FILE: pr_agent/tools/pr_reviewer.py
================================================
import copy
import datetime
import traceback
from collections import OrderedDict
from functools import partial
from typing import List, Tuple

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import (add_ai_metadata_to_diff_files,
                                         get_pr_diff,
                                         retry_with_fallback_models)
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import (ModelType, PRReviewHeader,
                                 convert_to_markdown_v2, github_action_output,
                                 load_yaml, show_relevant_configurations)
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import (get_git_provider,
                                    get_git_provider_with_context)
from pr_agent.git_providers.git_provider import (IncrementalPR,
                                                 get_main_pr_language)
from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage
from pr_agent.tools.ticket_pr_compliance_check import (
    extract_and_cache_pr_tickets, extract_tickets)


class PRReviewer:
    """
    The PRReviewer class is responsible for reviewing a pull request and generating feedback using an AI model.
    """

    def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, args: list = None,
                 ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        """
        Initialize the PRReviewer object with the necessary attributes and objects to review a pull request.

        Args:
            pr_url (str): The URL of the pull request to be reviewed.
            is_answer (bool, optional): Indicates whether the review is being done in answer mode. Defaults to False.
            is_auto (bool, optional): Indicates whether the review is being done in automatic mode. Defaults to False.
            ai_handler (BaseAiHandler): The AI handler to be used for the review. Defaults to None.
            args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.
        """
        self.git_provider = get_git_provider_with_context(pr_url)
        self.args = args
        self.incremental = self.parse_incremental(args)  # -i command
        if self.incremental and self.incremental.is_incremental:
            self.git_provider.get_incremental_commits(self.incremental)

        self.main_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )
        self.pr_url = pr_url
        self.is_answer = is_answer
        self.is_auto = is_auto

        if self.is_answer and not self.git_provider.is_supported("get_issue_comments"):
            raise Exception(f"Answer mode is not supported for {get_settings().config.git_provider} for now")
        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_language
        self.patches_diff = None
        self.prediction = None
        answer_str, question_str = self._get_user_answers()
        self.pr_description, self.pr_description_files = (
            self.git_provider.get_pr_description(split_changes_walkthrough=True))
        if (self.pr_description_files and get_settings().get("config.is_auto_command", False) and
                get_settings().get("config.enable_ai_metadata", False)):
            add_ai_metadata_to_diff_files(self.git_provider, self.pr_description_files)
            get_logger().debug(f"AI metadata added to the this command")
        else:
            get_settings().set("config.enable_ai_metadata", False)
            get_logger().debug(f"AI metadata is disabled for this command")

        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.pr_description,
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
            "num_pr_files": self.git_provider.get_num_of_files(),
            "num_max_findings": get_settings().pr_reviewer.num_max_findings,
            "require_score": get_settings().pr_reviewer.require_score_review,
            "require_tests": get_settings().pr_reviewer.require_tests_review,
            "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
            "require_estimate_contribution_time_cost": get_settings().pr_reviewer.require_estimate_contribution_time_cost,
            'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
            'require_security_review': get_settings().pr_reviewer.require_security_review,
            'require_todo_scan': get_settings().pr_reviewer.get("require_todo_scan", False),
            'question_str': question_str,
            'answer_str': answer_str,
            "extra_instructions": get_settings().pr_reviewer.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
            "custom_labels": "",
            "enable_custom_labels": get_settings().config.enable_custom_labels,
            "is_ai_metadata":  get_settings().get("config.enable_ai_metadata", False),
            "related_tickets": get_settings().get('related_tickets', []),
            'duplicate_prompt_examples': get_settings().config.get('duplicate_prompt_examples', False),
            "date": datetime.datetime.now().strftime('%Y-%m-%d'),
        }

        self.token_handler = TokenHandler(
            self.git_provider.pr,
            self.vars,
            get_settings().pr_review_prompt.system,
            get_settings().pr_review_prompt.user
        )

    def parse_incremental(self, args: List[str]):
        is_incremental = False
        if args and len(args) >= 1:
            arg = args[0]
            if arg == "-i":
                is_incremental = True
        incremental = IncrementalPR(is_incremental)
        return incremental

    async def run(self) -> None:
        try:
            if not self.git_provider.get_files():
                get_logger().info(f"PR has no files: {self.pr_url}, skipping review")
                return None

            if self.incremental.is_incremental and not self._can_run_incremental_review():
                return None

            # if isinstance(self.args, list) and self.args and self.args[0] == 'auto_approve':
            #     get_logger().info(f'Auto approve flow PR: {self.pr_url} ...')
            #     self.auto_approve_logic()
            #     return None

            get_logger().info(f'Reviewing PR: {self.pr_url} ...')
            relevant_configs = {'pr_reviewer': dict(get_settings().pr_reviewer),
                                'config': dict(get_settings().config)}
            get_logger().debug("Relevant configs", artifacts=relevant_configs)

            # ticket extraction if exists
            await extract_and_cache_pr_tickets(self.git_provider, self.vars)

            if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set:
                get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files")
                previous_review_url = ""
                if hasattr(self.git_provider, "previous_review"):
                    previous_review_url = self.git_provider.previous_review.html_url
                if get_settings().config.publish_output:
                    self.git_provider.publish_comment(f"Incremental Review Skipped\n"
                                    f"No files were changed since the [previous PR Review]({previous_review_url})")
                return None

            if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):
                self.git_provider.publish_comment("Preparing review...", is_temporary=True)

            await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.REGULAR)
            if not self.prediction:
                self.git_provider.remove_initial_comment()
                return None

            pr_review = self._prepare_pr_review()
            get_logger().debug(f"PR output", artifact=pr_review)

            should_publish = get_settings().config.publish_output and self._should_publish_review_no_suggestions(pr_review)
            if not should_publish:
                reason = "Review output is not published"
                if get_settings().config.publish_output:
                    reason += ": no major issues detected."
                get_logger().info(reason)
                get_settings().data = {"artifact": pr_review}
                return

            # publish the review
            if get_settings().pr_reviewer.persistent_comment and not self.incremental.is_incremental:
                final_update_message = get_settings().pr_reviewer.final_update_message
                self.git_provider.publish_persistent_comment(pr_review,
                                                            initial_header=f"{PRReviewHeader.REGULAR.value} 🔍",
                                                            update_header=True,
                                                            final_update_message=final_update_message, )
            else:
                self.git_provider.publish_comment(pr_review)

            self.git_provider.remove_initial_comment()
        except Exception as e:
            get_logger().error(f"Failed to review PR: {e}")

    def _should_publish_review_no_suggestions(self, pr_review: str) -> bool:
        return get_settings().pr_reviewer.get('publish_output_no_suggestions', True) or "No major issues detected" not in pr_review

    async def _prepare_prediction(self, model: str) -> None:
        self.patches_diff = get_pr_diff(self.git_provider,
                                        self.token_handler,
                                        model,
                                        add_line_numbers_to_hunks=True,
                                        disable_extra_lines=False,)

        if self.patches_diff:
            get_logger().debug(f"PR diff", diff=self.patches_diff)
            self.prediction = await self._get_prediction(model)
        else:
            get_logger().warning(f"Empty diff for PR: {self.pr_url}")
            self.prediction = None

    async def _get_prediction(self, model: str) -> str:
        """
        Generate an AI prediction for the pull request review.

        Args:
            model: A string representing the AI model to be used for the prediction.

        Returns:
            A string representing the AI prediction for the pull request review.
        """
        variables = copy.deepcopy(self.vars)
        variables["diff"] = self.patches_diff  # update diff

        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(get_settings().pr_review_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables)

        response, finish_reason = await self.ai_handler.chat_completion(
            model=model,
            temperature=get_settings().config.temperature,
            system=system_prompt,
            user=user_prompt
        )

        return response

    def _prepare_pr_review(self) -> str:
        """
        Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes
        the feedback.
        """
        first_key = 'review'
        last_key = 'security_concerns'
        data = load_yaml(self.prediction.strip(),
                         keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
                                        "relevant_file:", "relevant_line:", "suggestion:"],
                         first_key=first_key, last_key=last_key)
        github_action_output(data, 'review')

        if 'review' not in data:
            get_logger().exception("Failed to parse review data", artifact={"data": data})
            return ""

        # move data['review'] 'key_issues_to_review' key to the end of the dictionary
        if 'key_issues_to_review' in data['review']:
            key_issues_to_review = data['review'].pop('key_issues_to_review')
            data['review']['key_issues_to_review'] = key_issues_to_review

        incremental_review_markdown_text = None
        # Add incremental review section
        if self.incremental.is_incremental:
            last_commit_url = f"{self.git_provider.get_pr_url()}/commits/" \
                              f"{self.git_provider.incremental.first_new_commit_sha}"
            incremental_review_markdown_text = f"Starting from commit {last_commit_url}"

        markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
                                            incremental_review_markdown_text,
                                               git_provider=self.git_provider,
                                               files=self.git_provider.get_diff_files())

        # Add help text if gfm_markdown is supported
        if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text:
            markdown_text += "<hr>\n\n<details> <summary><strong>💡 Tool usage guide:</strong></summary><hr> \n\n"
            markdown_text += HelpMessage.get_review_usage_guide()
            markdown_text += "\n</details>\n"

        # Output the relevant configurations if enabled
        if get_settings().get('config', {}).get('output_relevant_configurations', False):
            markdown_text += show_relevant_configurations(relevant_section='pr_reviewer')

        # Add custom labels from the review prediction (effort, security)
        self.set_review_labels(data)

        if markdown_text == None or len(markdown_text) == 0:
            markdown_text = ""

        return markdown_text

    def _get_user_answers(self) -> Tuple[str, str]:
        """
        Retrieves the question and answer strings from the discussion messages related to a pull request.

        Returns:
            A tuple containing the question and answer strings.
        """
        question_str = ""
        answer_str = ""

        if self.is_answer:
            discussion_messages = self.git_provider.get_issue_comments()

            for message in discussion_messages.reversed:
                if "Questions to better understand the PR:" in message.body:
                    question_str = message.body
                elif '/answer' in message.body:
                    answer_str = message.body

                if answer_str and question_str:
                    break

        return question_str, answer_str

    def _get_previous_review_comment(self):
        """
        Get the previous review comment if it exists.
        """
        try:
            if hasattr(self.git_provider, "get_previous_review"):
                return self.git_provider.get_previous_review(
                    full=not self.incremental.is_incremental,
                    incremental=self.incremental.is_incremental,
                )
        except Exception as e:
            get_logger().exception(f"Failed to get previous review comment, error: {e}")

    def _remove_previous_review_comment(self, comment):
        """
        Remove the previous review comment if it exists.
        """
        try:
            if comment:
                self.git_provider.remove_comment(comment)
        except Exception as e:
            get_logger().exception(f"Failed to remove previous review comment, error: {e}")

    def _can_run_incremental_review(self) -> bool:
        """
        Checks if we can run incremental review according the various configurations and previous review.
        """
        # checking if running is auto mode but there are no new commits
        if self.is_auto and not self.incremental.first_new_commit_sha:
            get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits")
            return False

        if not hasattr(self.git_provider, "get_incremental_commits"):
            get_logger().info(f"Incremental review is not supported for {get_settings().config.git_provider}")
            return False
        # checking if there are enough commits to start the review
        num_new_commits = len(self.incremental.commits_range)
        num_commits_threshold = get_settings().pr_reviewer.minimal_commits_for_incremental_review
        not_enough_commits = num_new_commits < num_commits_threshold
        # checking if the commits are not too recent to start the review
        recent_commits_threshold = datetime.datetime.now() - datetime.timedelta(
            minutes=get_settings().pr_reviewer.minimal_minutes_for_incremental_review
        )
        last_seen_commit_date = (
            self.incremental.last_seen_commit.commit.author.date if self.incremental.last_seen_commit else None
        )
        all_commits_too_recent = (
            last_seen_commit_date > recent_commits_threshold if self.incremental.last_seen_commit else False
        )
        # check all the thresholds or just one to start the review
        condition = any if get_settings().pr_reviewer.require_all_thresholds_for_incremental_review else all
        if condition((not_enough_commits, all_commits_too_recent)):
            get_logger().info(
                f"Incremental review is enabled for {self.pr_url} but didn't pass the threshold check to run:"
                f"\n* Number of new commits = {num_new_commits} (threshold is {num_commits_threshold})"
                f"\n* Last seen commit date = {last_seen_commit_date} (threshold is {recent_commits_threshold})"
            )
            return False
        return True

    def set_review_labels(self, data):
        if not get_settings().config.publish_output:
            return

        if not get_settings().pr_reviewer.require_estimate_effort_to_review:
            get_settings().pr_reviewer.enable_review_labels_effort = False # we did not generate this output
        if not get_settings().pr_reviewer.require_security_review:
            get_settings().pr_reviewer.enable_review_labels_security = False # we did not generate this output

        if (get_settings().pr_reviewer.enable_review_labels_security or
                get_settings().pr_reviewer.enable_review_labels_effort):
            try:
                review_labels = []
                if get_settings().pr_reviewer.enable_review_labels_effort:
                    estimated_effort = data['review']['estimated_effort_to_review_[1-5]']
                    estimated_effort_number = 0
                    if isinstance(estimated_effort, str):
                        try:
                            estimated_effort_number = int(estimated_effort.split(',')[0])
                        except ValueError:
                            get_logger().warning(f"Invalid estimated_effort value: {estimated_effort}")
                    elif isinstance(estimated_effort, int):
                        estimated_effort_number = estimated_effort
                    else:
                        get_logger().warning(f"Unexpected type for estimated_effort: {type(estimated_effort)}")
                    if 1 <= estimated_effort_number <= 5:  # 1, because ...
                        review_labels.append(f'Review effort {estimated_effort_number}/5')
                if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review:
                    security_concerns = data['review']['security_concerns']  # yes, because ...
                    security_concerns_bool = 'yes' in security_concerns.lower() or 'true' in security_concerns.lower()
                    if security_concerns_bool:
                        review_labels.append('Possible security concern')

                current_labels = self.git_provider.get_pr_labels(update=True)
                if not current_labels:
                    current_labels = []
                get_logger().debug(f"Current labels:\n{current_labels}")
                if current_labels:
                    current_labels_filtered = [label for label in current_labels if
                                               not label.lower().startswith('review effort') and not label.lower().startswith(
                                                   'possible security concern')]
                else:
                    current_labels_filtered = []
                new_labels = review_labels + current_labels_filtered
                if (current_labels or review_labels) and sorted(new_labels) != sorted(current_labels):
                    get_logger().info(f"Setting review labels:\n{review_labels + current_labels_filtered}")
                    self.git_provider.publish_labels(new_labels)
                else:
                    get_logger().info(f"Review labels are already set:\n{review_labels + current_labels_filtered}")
            except Exception as e:
                get_logger().error(f"Failed to set review labels, error: {e}")

    def auto_approve_logic(self):
        """
        Auto-approve a pull request if it meets the conditions for auto-approval.
        """
        if get_settings().config.enable_auto_approval:
            is_auto_approved = self.git_provider.auto_approve()
            if is_auto_approved:
                get_logger().info("Auto-approved PR")
                self.git_provider.publish_comment("Auto-approved PR")
        else:
            get_logger().info("Auto-approval option is disabled")
            self.git_provider.publish_comment("Auto-approval option for PR-Agent is disabled. "
                                              "You can enable it via a [configuration file](https://github.com/Codium-ai/pr-agent/blob/main/docs/REVIEW.md#auto-approval-1)")


================================================
FILE: pr_agent/tools/pr_similar_issue.py
================================================
import time
from enum import Enum
from typing import List

import openai
from pydantic import BaseModel, Field

from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import get_max_tokens
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.log import get_logger

MODEL = "text-embedding-ada-002"


class PRSimilarIssue:
    def __init__(self, issue_url: str, ai_handler, args: list = None):
        self.issue_url = issue_url
        self.supported = get_settings().config.git_provider == "github"
        if not self.supported:
            return

        self.cli_mode = get_settings().CONFIG.CLI_MODE
        self.max_issues_to_scan = get_settings().pr_similar_issue.max_issues_to_scan
        self.git_provider = get_git_provider()()
        repo_name, issue_number = self.git_provider._parse_issue_url(issue_url.split('=')[-1])
        self.git_provider.repo = repo_name
        self.git_provider.repo_obj = self.git_provider.github_client.get_repo(repo_name)
        self.token_handler = TokenHandler()
        repo_obj = self.git_provider.repo_obj
        repo_name_for_index = self.repo_name_for_index = repo_obj.full_name.lower().replace('/', '-').replace('_/', '-')
        index_name = self.index_name = "codium-ai-pr-agent-issues"

        if get_settings().pr_similar_issue.vectordb == "pinecone":
            try:
                import pandas as pd
                import pinecone
                from pinecone_datasets import Dataset, DatasetMetadata
            except:
                raise Exception("Please install 'pinecone' and 'pinecone_datasets' to use pinecone as vectordb")
            # assuming pinecone api key and environment are set in secrets file
            try:
                api_key = get_settings().pinecone.api_key
                environment = get_settings().pinecone.environment
            except Exception:
                if not self.cli_mode:
                    repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
                    issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
                    issue_main.create_comment("Please set pinecone api key and environment in secrets file")
                raise Exception("Please set pinecone api key and environment in secrets file")

            # check if index exists, and if repo is already indexed
            run_from_scratch = False
            if run_from_scratch:  # for debugging
                pinecone.init(api_key=api_key, environment=environment)
                if index_name in pinecone.list_indexes():
                    get_logger().info('Removing index...')
                    pinecone.delete_index(index_name)
                    get_logger().info('Done')

            upsert = True
            pinecone.init(api_key=api_key, environment=environment)
            if not index_name in pinecone.list_indexes():
                run_from_scratch = True
                upsert = False
            else:
                if get_settings().pr_similar_issue.force_update_dataset:
                    upsert = True
                else:
                    pinecone_index = pinecone.Index(index_name=index_name)
                    res = pinecone_index.fetch([f"example_issue_{repo_name_for_index}"]).to_dict()
                    if res["vectors"]:
                        upsert = False

            if run_from_scratch or upsert:  # index the entire repo
                get_logger().info('Indexing the entire repo...')

                get_logger().info('Getting issues...')
                issues = list(repo_obj.get_issues(state='all'))
                get_logger().info('Done')
                self._update_index_with_issues(issues, repo_name_for_index, upsert=upsert)
            else:  # update index if needed
                pinecone_index = pinecone.Index(index_name=index_name)
                issues_to_update = []
                issues_paginated_list = repo_obj.get_issues(state='all')
                counter = 1
                for issue in issues_paginated_list:
                    if issue.pull_request:
                        continue
                    issue_str, comments, number = self._process_issue(issue)
                    issue_key = f"issue_{number}"
                    id = issue_key + "." + "issue"
                    res = pinecone_index.fetch([id]).to_dict()
                    is_new_issue = True
                    for vector in res["vectors"].values():
                        if vector['metadata']['repo'] == repo_name_for_index:
                            is_new_issue = False
                            break
                    if is_new_issue:
                        counter += 1
                        issues_to_update.append(issue)
                    else:
                        break

                if issues_to_update:
                    get_logger().info(f'Updating index with {counter} new issues...')
                    self._update_index_with_issues(issues_to_update, repo_name_for_index, upsert=True)
                else:
                    get_logger().info('No new issues to update')

        elif get_settings().pr_similar_issue.vectordb == "lancedb":
            try:
                import lancedb  # import lancedb only if needed
            except:
                raise Exception("Please install lancedb to use lancedb as vectordb")
            self.db = lancedb.connect(get_settings().lancedb.uri)
            self.table = None

            run_from_scratch = False
            if run_from_scratch:  # for debugging
                if index_name in self.db.table_names():
                    get_logger().info('Removing Table...')
                    self.db.drop_table(index_name)
                    get_logger().info('Done')

            ingest = True
            if index_name not in self.db.table_names():
                run_from_scratch = True
                ingest = False
            else:
                if get_settings().pr_similar_issue.force_update_dataset:
                    ingest = True
                else:
                    self.table = self.db[index_name]
                    res = self.table.search().limit(len(self.table)).where(f"id='example_issue_{repo_name_for_index}'").to_list()
                    get_logger().info("result: ", res)
                    if res[0].get("vector"):
                        ingest = False

            if run_from_scratch or ingest:  # indexing the entire repo
                get_logger().info('Indexing the entire repo...')

                get_logger().info('Getting issues...')
                issues = list(repo_obj.get_issues(state='all'))
                get_logger().info('Done')

                self._update_table_with_issues(issues, repo_name_for_index, ingest=ingest)
            else:  # update table if needed
                issues_to_update = []
                issues_paginated_list = repo_obj.get_issues(state='all')
                counter = 1
                for issue in issues_paginated_list:
                    if issue.pull_request:
                        continue
                    issue_str, comments, number = self._process_issue(issue)
                    issue_key = f"issue_{number}"
                    issue_id = issue_key + "." + "issue"
                    res = self.table.search().limit(len(self.table)).where(f"id='{issue_id}'").to_list()
                    is_new_issue = True
                    for r in res:
                        if r['metadata']['repo'] == repo_name_for_index:
                            is_new_issue = False
                            break
                    if is_new_issue:
                        counter += 1
                        issues_to_update.append(issue)
                    else:
                        break

                if issues_to_update:
                    get_logger().info(f'Updating index with {counter} new issues...')
                    self._update_table_with_issues(issues_to_update, repo_name_for_index, ingest=True)
                else:
                    get_logger().info('No new issues to update')

        elif get_settings().pr_similar_issue.vectordb == "qdrant":
            try:
                import qdrant_client
                from qdrant_client.models import (Distance, FieldCondition,
                                                  Filter, MatchValue,
                                                  PointStruct, VectorParams)
            except Exception:
                raise Exception("Please install qdrant-client to use qdrant as vectordb")

            api_key = None
            url = None
            try:
                api_key = get_settings().qdrant.api_key
                url = get_settings().qdrant.url
            except Exception:
                if not self.cli_mode:
                    repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
                    issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
                    issue_main.create_comment("Please set qdrant url and api key in secrets file")
                raise Exception("Please set qdrant url and api key in secrets file")

            self.qdrant = qdrant_client.QdrantClient(url=url, api_key=api_key)

            run_from_scratch = False
            ingest = True

            if not self.qdrant.collection_exists(collection_name=self.index_name):
                run_from_scratch = True
                ingest = False
                self.qdrant.create_collection(
                    collection_name=self.index_name,
                    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
                )
            else:
                if get_settings().pr_similar_issue.force_update_dataset:
                    ingest = True
                else:
                    response = self.qdrant.count(
                        collection_name=self.index_name,
                        count_filter=Filter(must=[
                            FieldCondition(key="metadata.repo", match=MatchValue(value=repo_name_for_index)),
                            FieldCondition(key="id", match=MatchValue(value=f"example_issue_{repo_name_for_index}")),
                        ]),
                    )
                    ingest = True if response.count == 0 else False

            if run_from_scratch or ingest:
                get_logger().info('Indexing the entire repo...')
                get_logger().info('Getting issues...')
                issues = list(repo_obj.get_issues(state='all'))
                get_logger().info('Done')
                self._update_qdrant_with_issues(issues, repo_name_for_index, ingest=ingest)
            else:
                issues_to_update = []
                issues_paginated_list = repo_obj.get_issues(state='all')
                counter = 1
                for issue in issues_paginated_list:
                    if issue.pull_request:
                        continue
                    issue_str, comments, number = self._process_issue(issue)
                    issue_key = f"issue_{number}"
                    point_id = issue_key + "." + "issue"
                    response = self.qdrant.count(
                        collection_name=self.index_name,
                        count_filter=Filter(must=[
                            FieldCondition(key="id", match=MatchValue(value=point_id)),
                            FieldCondition(key="metadata.repo", match=MatchValue(value=repo_name_for_index)),
                        ]),
                    )
                    if response.count == 0:
                        counter += 1
                        issues_to_update.append(issue)
                    else:
                        break

                if issues_to_update:
                    get_logger().info(f'Updating index with {counter} new issues...')
                    self._update_qdrant_with_issues(issues_to_update, repo_name_for_index, ingest=True)
                else:
                    get_logger().info('No new issues to update')


    async def run(self):
        if not self.supported:
            message = "The /similar_issue tool is currently supported only for GitHub."
            if get_settings().config.publish_output:
                try:
                    from pr_agent.git_providers import get_git_provider_with_context

                    provider = get_git_provider_with_context(self.issue_url)
                    provider.publish_comment(message)
                except Exception as e:
                    get_logger().warning(
                        "Failed to publish /similar_issue unsupported message",
                        artifact={"error": str(e)},
                    )
            return ""
        get_logger().info('Getting issue...')
        repo_name, original_issue_number = self.git_provider._parse_issue_url(self.issue_url.split('=')[-1])
        issue_main = self.git_provider.repo_obj.get_issue(original_issue_number)
        issue_str, comments, number = self._process_issue(issue_main)
        openai.api_key = get_settings().openai.key
        get_logger().info('Done')

        get_logger().info('Querying...')
        res = openai.Embedding.create(input=[issue_str], engine=MODEL)
        embeds = [record['embedding'] for record in res['data']]

        relevant_issues_number_list = []
        relevant_comment_number_list = []
        score_list = []

        if get_settings().pr_similar_issue.vectordb == "pinecone":
            pinecone_index = pinecone.Index(index_name=self.index_name)
            res = pinecone_index.query(embeds[0],
                                    top_k=5,
                                    filter={"repo": self.repo_name_for_index},
                                    include_metadata=True).to_dict()

            for r in res['matches']:
                # skip example issue
                if 'example_issue_' in r["id"]:
                    continue

                try:
                    issue_number = int(r["id"].split('.')[0].split('_')[-1])
                except:
                    get_logger().debug(f"Failed to parse issue number from {r['id']}")
                    continue

                if original_issue_number == issue_number:
                    continue
                if issue_number not in relevant_issues_number_list:
                    relevant_issues_number_list.append(issue_number)
                if 'comment' in r["id"]:
                    relevant_comment_number_list.append(int(r["id"].split('.')[1].split('_')[-1]))
                else:
                    relevant_comment_number_list.append(-1)
                score_list.append(str("{:.2f}".format(r['score'])))
            get_logger().info('Done')

        elif get_settings().pr_similar_issue.vectordb == "lancedb":
            res = self.table.search(embeds[0]).where(f"metadata.repo='{self.repo_name_for_index}'", prefilter=True).to_list()

            for r in res:
                # skip example issue
                if 'example_issue_' in r["id"]:
                    continue

                try:
                    issue_number = int(r["id"].split('.')[0].split('_')[-1])
                except:
                    get_logger().debug(f"Failed to parse issue number from {r['id']}")
                    continue

                if original_issue_number == issue_number:
                    continue
                if issue_number not in relevant_issues_number_list:
                    relevant_issues_number_list.append(issue_number)

                if 'comment' in r["id"]:
                    relevant_comment_number_list.append(int(r["id"].split('.')[1].split('_')[-1]))
                else:
                    relevant_comment_number_list.append(-1)
                score_list.append(str("{:.2f}".format(1-r['_distance'])))
            get_logger().info('Done')

        elif get_settings().pr_similar_issue.vectordb == "qdrant":
            from qdrant_client.models import FieldCondition, Filter, MatchValue
            res = self.qdrant.search(
                collection_name=self.index_name,
                query_vector=embeds[0],
                limit=5,
                query_filter=Filter(must=[FieldCondition(key="metadata.repo", match=MatchValue(value=self.repo_name_for_index))]),
                with_payload=True,
            )

            for r in res:
                rid = r.payload.get("id", "")
                if 'example_issue_' in rid:
                    continue
                try:
                    issue_number = int(rid.split('.')[0].split('_')[-1])
                except Exception:
                    get_logger().debug(f"Failed to parse issue number from {rid}")
                    continue
                if original_issue_number == issue_number:
                    continue
                if issue_number not in relevant_issues_number_list:
                    relevant_issues_number_list.append(issue_number)
                if 'comment' in rid:
                    relevant_comment_number_list.append(int(rid.split('.')[1].split('_')[-1]))
                else:
                    relevant_comment_number_list.append(-1)
                score_list.append(str("{:.2f}".format(r.score)))
            get_logger().info('Done')

        get_logger().info('Publishing response...')
        similar_issues_str = "### Similar Issues\n___\n\n"

        for i, issue_number_similar in enumerate(relevant_issues_number_list):
            issue = self.git_provider.repo_obj.get_issue(issue_number_similar)
            title = issue.title
            url = issue.html_url
            if relevant_comment_number_list[i] != -1:
                url = list(issue.get_comments())[relevant_comment_number_list[i]].html_url
            similar_issues_str += f"{i + 1}. **[{title}]({url})** (score={score_list[i]})\n\n"
        if get_settings().config.publish_output:
            response = issue_main.create_comment(similar_issues_str)
        get_logger().info(similar_issues_str)
        get_logger().info('Done')

    def _process_issue(self, issue):
        header = issue.title
        body = issue.body
        number = issue.number
        if get_settings().pr_similar_issue.skip_comments:
            comments = []
        else:
            comments = list(issue.get_comments())
        issue_str = f"Issue Header: \"{header}\"\n\nIssue Body:\n{body}"
        return issue_str, comments, number

    def _update_index_with_issues(self, issues_list, repo_name_for_index, upsert=False):
        get_logger().info('Processing issues...')
        corpus = Corpus()
        example_issue_record = Record(
            id=f"example_issue_{repo_name_for_index}",
            text="example_issue",
            metadata=Metadata(repo=repo_name_for_index)
        )
        corpus.append(example_issue_record)

        counter = 0
        for issue in issues_list:
            if issue.pull_request:
                continue

            counter += 1
            if counter % 100 == 0:
                get_logger().info(f"Scanned {counter} issues")
            if counter >= self.max_issues_to_scan:
                get_logger().info(f"Scanned {self.max_issues_to_scan} issues, stopping")
                break

            issue_str, comments, number = self._process_issue(issue)
            issue_key = f"issue_{number}"
            username = issue.user.login
            created_at = str(issue.created_at)
            if len(issue_str) < 8000 or \
                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):  # fast reject first
                issue_record = Record(
                    id=issue_key + "." + "issue",
                    text=issue_str,
                    metadata=Metadata(repo=repo_name_for_index,
                                      username=username,
                                      created_at=created_at,
                                      level=IssueLevel.ISSUE)
                )
                corpus.append(issue_record)
                if comments:
                    for j, comment in enumerate(comments):
                        comment_body = comment.body
                        num_words_comment = len(comment_body.split())
                        if num_words_comment < 10 or not isinstance(comment_body, str):
                            continue

                        if len(comment_body) < 8000 or \
                                self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:
                            comment_record = Record(
                                id=issue_key + ".comment_" + str(j + 1),
                                text=comment_body,
                                metadata=Metadata(repo=repo_name_for_index,
                                                  username=username,  # use issue username for all comments
                                                  created_at=created_at,
                                                  level=IssueLevel.COMMENT)
                            )
                            corpus.append(comment_record)
        df = pd.DataFrame(corpus.model_dump()["documents"])
        get_logger().info('Done')

        get_logger().info('Embedding...')
        openai.api_key = get_settings().openai.key
        list_to_encode = list(df["text"].values)
        try:
            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)
            embeds = [record['embedding'] for record in res['data']]
        except:
            embeds = []
            get_logger().error('Failed to embed entire list, embedding one by one...')
            for i, text in enumerate(list_to_encode):
                try:
                    res = openai.Embedding.create(input=[text], engine=MODEL)
                    embeds.append(res['data'][0]['embedding'])
                except:
                    embeds.append([0] * 1536)
        df["values"] = embeds
        meta = DatasetMetadata.empty()
        meta.dense_model.dimension = len(embeds[0])
        ds = Dataset.from_pandas(df, meta)
        get_logger().info('Done')

        api_key = get_settings().pinecone.api_key
        environment = get_settings().pinecone.environment
        if not upsert:
            get_logger().info('Creating index from scratch...')
            ds.to_pinecone_index(self.index_name, api_key=api_key, environment=environment)
            time.sleep(15)  # wait for pinecone to finalize indexing before querying
        else:
            get_logger().info('Upserting index...')
            namespace = ""
            batch_size: int = 100
            concurrency: int = 10
            pinecone.init(api_key=api_key, environment=environment)
            ds._upsert_to_index(self.index_name, namespace, batch_size, concurrency)
            time.sleep(5)  # wait for pinecone to finalize upserting before querying
        get_logger().info('Done')

    def _update_table_with_issues(self, issues_list, repo_name_for_index, ingest=False):
        get_logger().info('Processing issues...')

        corpus = Corpus()
        example_issue_record = Record(
            id=f"example_issue_{repo_name_for_index}",
            text="example_issue",
            metadata=Metadata(repo=repo_name_for_index)
        )
        corpus.append(example_issue_record)

        counter = 0
        for issue in issues_list:
            if issue.pull_request:
                continue

            counter += 1
            if counter % 100 == 0:
                get_logger().info(f"Scanned {counter} issues")
            if counter >= self.max_issues_to_scan:
                get_logger().info(f"Scanned {self.max_issues_to_scan} issues, stopping")
                break

            issue_str, comments, number = self._process_issue(issue)
            issue_key = f"issue_{number}"
            username = issue.user.login
            created_at = str(issue.created_at)
            if len(issue_str) < 8000 or \
                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):  # fast reject first
                issue_record = Record(
                    id=issue_key + "." + "issue",
                    text=issue_str,
                    metadata=Metadata(repo=repo_name_for_index,
                                        username=username,
                                        created_at=created_at,
                                        level=IssueLevel.ISSUE)
                )
                corpus.append(issue_record)
                if comments:
                    for j, comment in enumerate(comments):
                        comment_body = comment.body
                        num_words_comment = len(comment_body.split())
                        if num_words_comment < 10 or not isinstance(comment_body, str):
                            continue

                        if len(comment_body) < 8000 or \
                                self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:
                            comment_record = Record(
                                id=issue_key + ".comment_" + str(j + 1),
                                text=comment_body,
                                metadata=Metadata(repo=repo_name_for_index,
                                                    username=username,  # use issue username for all comments
                                                    created_at=created_at,
                                                    level=IssueLevel.COMMENT)
                            )
                            corpus.append(comment_record)
        df = pd.DataFrame(corpus.model_dump()["documents"])
        get_logger().info('Done')

        get_logger().info('Embedding...')
        openai.api_key = get_settings().openai.key
        list_to_encode = list(df["text"].values)
        try:
            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)
            embeds = [record['embedding'] for record in res['data']]
        except:
            embeds = []
            get_logger().error('Failed to embed entire list, embedding one by one...')
            for i, text in enumerate(list_to_encode):
                try:
                    res = openai.Embedding.create(input=[text], engine=MODEL)
                    embeds.append(res['data'][0]['embedding'])
                except:
                    embeds.append([0] * 1536)
        df["vector"] = embeds
        get_logger().info('Done')

        if not ingest:
            get_logger().info('Creating table from scratch...')
            self.table = self.db.create_table(self.index_name, data=df, mode="overwrite")
            time.sleep(15)
        else:
            get_logger().info('Ingesting in Table...')
            if self.index_name not in self.db.table_names():
                self.table.add(df)
            else:
                get_logger().info(f"Table {self.index_name} doesn't exists!")
            time.sleep(5)
        get_logger().info('Done')


    def _update_qdrant_with_issues(self, issues_list, repo_name_for_index, ingest=False):
        try:
            import uuid

            import pandas as pd
            from qdrant_client.models import PointStruct
        except Exception:
            raise

        get_logger().info('Processing issues...')
        corpus = Corpus()
        example_issue_record = Record(
            id=f"example_issue_{repo_name_for_index}",
            text="example_issue",
            metadata=Metadata(repo=repo_name_for_index)
        )
        corpus.append(example_issue_record)

        counter = 0
        for issue in issues_list:
            if issue.pull_request:
                continue

            counter += 1
            if counter % 100 == 0:
                get_logger().info(f"Scanned {counter} issues")
            if counter >= self.max_issues_to_scan:
                get_logger().info(f"Scanned {self.max_issues_to_scan} issues, stopping")
                break

            issue_str, comments, number = self._process_issue(issue)
            issue_key = f"issue_{number}"
            username = issue.user.login
            created_at = str(issue.created_at)
            if len(issue_str) < 8000 or \
                    self.token_handler.count_tokens(issue_str) < get_max_tokens(MODEL):
                issue_record = Record(
                    id=issue_key + "." + "issue",
                    text=issue_str,
                    metadata=Metadata(repo=repo_name_for_index,
                                      username=username,
                                      created_at=created_at,
                                      level=IssueLevel.ISSUE)
                )
                corpus.append(issue_record)
                if comments:
                    for j, comment in enumerate(comments):
                        comment_body = comment.body
                        num_words_comment = len(comment_body.split())
                        if num_words_comment < 10 or not isinstance(comment_body, str):
                            continue

                        if len(comment_body) < 8000 or \
                                self.token_handler.count_tokens(comment_body) < MAX_TOKENS[MODEL]:
                            comment_record = Record(
                                id=issue_key + ".comment_" + str(j + 1),
                                text=comment_body,
                                metadata=Metadata(repo=repo_name_for_index,
                                                  username=username,
                                                  created_at=created_at,
                                                  level=IssueLevel.COMMENT)
                            )
                            corpus.append(comment_record)

        df = pd.DataFrame(corpus.model_dump()["documents"])
        get_logger().info('Done')

        get_logger().info('Embedding...')
        openai.api_key = get_settings().openai.key
        list_to_encode = list(df["text"].values)
        try:
            res = openai.Embedding.create(input=list_to_encode, engine=MODEL)
            embeds = [record['embedding'] for record in res['data']]
        except Exception:
            embeds = []
            get_logger().error('Failed to embed entire list, embedding one by one...')
            for i, text in enumerate(list_to_encode):
                try:
                    res = openai.Embedding.create(input=[text], engine=MODEL)
                    embeds.append(res['data'][0]['embedding'])
                except Exception:
                    embeds.append([0] * 1536)
        df["vector"] = embeds
        get_logger().info('Done')

        get_logger().info('Upserting into Qdrant...')
        points = []
        for row in df.to_dict(orient="records"):
            points.append(
                PointStruct(id=uuid.uuid5(uuid.NAMESPACE_DNS, row["id"]).hex, vector=row["vector"], payload={"id": row["id"], "text": row["text"], "metadata": row["metadata"]})
            )
        self.qdrant.upsert(collection_name=self.index_name, points=points)
        get_logger().info('Done')


class IssueLevel(str, Enum):
    ISSUE = "issue"
    COMMENT = "comment"


class Metadata(BaseModel):
    repo: str
    username: str = Field(default="@codium")
    created_at: str = Field(default="01-01-1970 00:00:00.00000")
    level: IssueLevel = Field(default=IssueLevel.ISSUE)

    class Config:
        use_enum_values = True


class Record(BaseModel):
    id: str
    text: str
    metadata: Metadata


class Corpus(BaseModel):
    documents: List[Record] = Field(default=[])

    def append(self, r: Record):
        self.documents.append(r)


================================================
FILE: pr_agent/tools/pr_update_changelog.py
================================================
import copy
from datetime import date
from functools import partial
from time import sleep
from typing import Tuple

from jinja2 import Environment, StrictUndefined

from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import ModelType, show_relevant_configurations
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import GithubProvider, get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger

CHANGELOG_LINES = 50


class PRUpdateChangelog:
    def __init__(self, pr_url: str, cli_mode=False, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):

        self.git_provider = get_git_provider()(pr_url)
        self.main_language = get_main_pr_language(
            self.git_provider.get_languages(), self.git_provider.get_files()
        )
        self.commit_changelog = get_settings().pr_update_changelog.push_changelog_changes
        self._get_changelog_file()  # self.changelog_file_str

        self.ai_handler = ai_handler()
        self.ai_handler.main_pr_language = self.main_language

        self.patches_diff = None
        self.prediction = None
        self.cli_mode = cli_mode
        self.vars = {
            "title": self.git_provider.pr.title,
            "branch": self.git_provider.get_pr_branch(),
            "description": self.git_provider.get_pr_description(),
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
            "pr_link": "",
            "changelog_file_str": self.changelog_file_str,
            "today": date.today(),
            "extra_instructions": get_settings().pr_update_changelog.extra_instructions,
            "commit_messages_str": self.git_provider.get_commit_messages(),
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
                                          self.vars,
                                          get_settings().pr_update_changelog_prompt.system,
                                          get_settings().pr_update_changelog_prompt.user)

    async def run(self):
        get_logger().info('Updating the changelog...')
        relevant_configs = {'pr_update_changelog': dict(get_settings().pr_update_changelog),
                            'config': dict(get_settings().config)}
        get_logger().debug("Relevant configs", artifacts=relevant_configs)

        # check if the git provider supports pushing changelog changes
        if get_settings().pr_update_changelog.push_changelog_changes and not hasattr(
            self.git_provider, "create_or_update_pr_file"
        ):
            get_logger().error(
                "Pushing changelog changes is not currently supported for this code platform"
            )
            if get_settings().config.publish_output:
                self.git_provider.publish_comment(
                    "Pushing changelog changes is not currently supported for this code platform"
                )
            return

        if get_settings().config.publish_output:
            self.git_provider.publish_comment("Preparing changelog updates...", is_temporary=True)

        await retry_with_fallback_models(self._prepare_prediction, model_type=ModelType.WEAK)

        new_file_content, answer = self._prepare_changelog_update()

        # Output the relevant configurations if enabled
        if get_settings().get('config', {}).get('output_relevant_configurations', False):
            answer += show_relevant_configurations(relevant_section='pr_update_changelog')

        get_logger().debug(f"PR output", artifact=answer)

        if get_settings().config.publish_output:
            self.git_provider.remove_initial_comment()
            if self.commit_changelog:
                self._push_changelog_update(new_file_content, answer)
            else:
                self.git_provider.publish_comment(f"**Changelog updates:** 🔄\n\n{answer}")

    async def _prepare_prediction(self, model: str):
        self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
        if self.patches_diff:
            get_logger().debug(f"PR diff", artifact=self.patches_diff)
            self.prediction = await self._get_prediction(model)
        else:
            get_logger().error(f"Error getting PR diff")
            self.prediction = ""

    async def _get_prediction(self, model: str):
        variables = copy.deepcopy(self.vars)
        variables["diff"] = self.patches_diff  # update diff
        if get_settings().pr_update_changelog.add_pr_link:
            variables["pr_link"] = self.git_provider.get_pr_url()
        environment = Environment(undefined=StrictUndefined)
        system_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.system).render(variables)
        user_prompt = environment.from_string(get_settings().pr_update_changelog_prompt.user).render(variables)
        response, finish_reason = await self.ai_handler.chat_completion(
            model=model, system=system_prompt, user=user_prompt, temperature=get_settings().config.temperature)

        # post-process the response
        response = response.strip()
        if not response:
            return ""
        if response.startswith("```"):
            response_lines = response.splitlines()
            response_lines = response_lines[1:]
            response = "\n".join(response_lines)
        response = response.strip("`")
        return response

    def _prepare_changelog_update(self) -> Tuple[str, str]:
        answer = self.prediction.strip().strip("```").strip()  # noqa B005
        if hasattr(self, "changelog_file"):
            existing_content = self.changelog_file
        else:
            existing_content = ""
        
        if existing_content:
            new_file_content = answer + "\n\n" + self.changelog_file
        else:
            new_file_content = answer

        if not self.commit_changelog:
            answer += "\n\n\n>to commit the new content to the CHANGELOG.md file, please type:" \
                      "\n>'/update_changelog --pr_update_changelog.push_changelog_changes=true'\n"

        return new_file_content, answer

    def _push_changelog_update(self, new_file_content, answer):
        if get_settings().pr_update_changelog.get("skip_ci_on_push", True):
            commit_message = "[skip ci] Update CHANGELOG.md"
        else:
            commit_message = "Update CHANGELOG.md"
        self.git_provider.create_or_update_pr_file(
            file_path="CHANGELOG.md",
            branch=self.git_provider.get_pr_branch(),
            contents=new_file_content,
            message=commit_message,
        )

        sleep(5)  # wait for the file to be updated
        try:
            if get_settings().config.git_provider == "github":
                last_commit_id = list(self.git_provider.pr.get_commits())[-1]
                d = dict(
                    body="CHANGELOG.md update",
                    path="CHANGELOG.md",
                    line=max(2, len(answer.splitlines())),
                    start_line=1,
                )
                self.git_provider.pr.create_review(commit=last_commit_id, comments=[d])
        except Exception:
            # we can't create a review for some reason, let's just publish a comment
            self.git_provider.publish_comment(f"**Changelog updates: 🔄**\n\n{answer}")

    def _get_default_changelog(self):
        example_changelog = \
"""
Example:
## <current_date>

### Added
...
### Changed
...
### Fixed
...
"""
        return example_changelog

    def _get_changelog_file(self):
        try:
            self.changelog_file = self.git_provider.get_pr_file_content(
                "CHANGELOG.md", self.git_provider.get_pr_branch()
            )
            
            if isinstance(self.changelog_file, bytes):
                self.changelog_file = self.changelog_file.decode('utf-8')
            
            changelog_file_lines = self.changelog_file.splitlines()
            changelog_file_lines = changelog_file_lines[:CHANGELOG_LINES]
            self.changelog_file_str = "\n".join(changelog_file_lines)
        except Exception as e:
            get_logger().warning(f"Error getting changelog file: {e}")
            self.changelog_file_str = ""
            self.changelog_file = ""
            return

        if not self.changelog_file_str:
            self.changelog_file_str = self._get_default_changelog()


================================================
FILE: pr_agent/tools/ticket_pr_compliance_check.py
================================================
import re
import traceback

from pr_agent.config_loader import get_settings
from pr_agent.git_providers import GithubProvider
from pr_agent.git_providers import AzureDevopsProvider
from pr_agent.log import get_logger

# Compile the regex pattern once, outside the function
GITHUB_TICKET_PATTERN = re.compile(
     r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#\d+)'
)
# Option A: issue number at start of branch or after /, followed by - or end (e.g. feature/1-test-issue, 123-fix)
BRANCH_ISSUE_PATTERN = re.compile(r"(?:^|/)(\d{1,6})(?=-|$)")

def find_jira_tickets(text):
    # Regular expression patterns for JIRA tickets
    patterns = [
        r'\b[A-Z]{2,10}-\d{1,7}\b',  # Standard JIRA ticket format (e.g., PROJ-123)
        r'(?:https?://[^\s/]+/browse/)?([A-Z]{2,10}-\d{1,7})\b'  # JIRA URL or just the ticket
    ]

    tickets = set()
    for pattern in patterns:
        matches = re.findall(pattern, text)
        for match in matches:
            if isinstance(match, tuple):
                # If it's a tuple (from the URL pattern), take the last non-empty group
                ticket = next((m for m in reversed(match) if m), None)
            else:
                ticket = match
            if ticket:
                tickets.add(ticket)

    return list(tickets)


def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'):
    """
    Extract all ticket links from PR description
    """
    github_tickets = set()
    try:
        # Use the updated pattern to find matches
        matches = GITHUB_TICKET_PATTERN.findall(pr_description)

        for match in matches:
            if match[0]:  # Full URL match
                github_tickets.add(match[0])
            elif match[1]:  # Shorthand notation match: owner/repo#issue_number
                owner, repo, issue_number = match[2], match[3], match[4]
                github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}')
            else:  # #123 format
                issue_number = match[5][1:]  # remove #
                if issue_number.isdigit() and len(issue_number) < 5 and repo_path:
                    github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}')

        if len(github_tickets) > 3:
            get_logger().info(f"Too many tickets found in PR description: {len(github_tickets)}")
            # Limit the number of tickets to 3
            github_tickets = set(list(github_tickets)[:3])
    except Exception as e:
        get_logger().error(f"Error extracting tickets error= {e}",
                           artifact={"traceback": traceback.format_exc()})

    return list(github_tickets)

def extract_ticket_links_from_branch_name(branch_name, repo_path, base_url_html="https://github.com"):
    """
    Extract GitHub issue URLs from branch name. Numbers are matched at start of branch or after /,
    followed by - or end (e.g. feature/1-test-issue -> #1). Respects extract_issue_from_branch
    and optional branch_issue_regex (may be under [config] in TOML).
    """
    if not branch_name or not repo_path:
        return []
    if not isinstance(branch_name, str):
        return []
    settings = get_settings()
    if not settings.get("extract_issue_from_branch", settings.get("config.extract_issue_from_branch", True)):
        return []
    github_tickets = set()
    custom_regex_str = settings.get("branch_issue_regex") or settings.get("config.branch_issue_regex", "") or ""
    if custom_regex_str:
        try:
            pattern = re.compile(custom_regex_str)
            if pattern.groups < 1:
                get_logger().error(
                    "branch_issue_regex must contain at least one capturing group for the issue number; using default pattern."
                )
                pattern = BRANCH_ISSUE_PATTERN
        except re.error as e:
            get_logger().error(f"Invalid custom regex for branch issue extraction: {e}")
            return []
    else:
        pattern = BRANCH_ISSUE_PATTERN
    for match in pattern.finditer(branch_name):
        try:
            issue_number = match.group(1)
        except IndexError:
            continue
        if issue_number and issue_number.isdigit():
            github_tickets.add(
                f"{base_url_html.strip('/')}/{repo_path}/issues/{issue_number}"
            )
    return list(github_tickets)


async def extract_tickets(git_provider):
    MAX_TICKET_CHARACTERS = 10000
    try:
        if isinstance(git_provider, GithubProvider):
            user_description = git_provider.get_user_description()
            description_tickets = extract_ticket_links_from_pr_description(
                user_description, git_provider.repo, git_provider.base_url_html
            )
            branch_name = git_provider.get_pr_branch()
            branch_tickets = extract_ticket_links_from_branch_name(
                branch_name, git_provider.repo, git_provider.base_url_html
            )
            seen = set()
            merged = []
            for link in description_tickets + branch_tickets:
                if link not in seen:
                    seen.add(link)
                    merged.append(link)
            if len(merged) > 3:
                get_logger().info(f"Too many tickets (description + branch): {len(merged)}")
                tickets = merged[:3]
            else:
                tickets = merged
            tickets_content = []

            if tickets:

                for ticket in tickets:
                    repo_name, original_issue_number = git_provider._parse_issue_url(ticket)

                    try:
                        issue_main = git_provider.repo_obj.get_issue(original_issue_number)
                    except Exception as e:
                        get_logger().error(f"Error getting main issue: {e}",
                                           artifact={"traceback": traceback.format_exc()})
                        continue

                    issue_body_str = issue_main.body or ""
                    if len(issue_body_str) > MAX_TICKET_CHARACTERS:
                        issue_body_str = issue_body_str[:MAX_TICKET_CHARACTERS] + "..."

                    # Extract sub-issues
                    sub_issues_content = []
                    try:
                        sub_issues = git_provider.fetch_sub_issues(ticket)
                        for sub_issue_url in sub_issues:
                            try:
                                sub_repo, sub_issue_number = git_provider._parse_issue_url(sub_issue_url)
                                sub_issue = git_provider.repo_obj.get_issue(sub_issue_number)

                                sub_body = sub_issue.body or ""
                                if len(sub_body) > MAX_TICKET_CHARACTERS:
                                    sub_body = sub_body[:MAX_TICKET_CHARACTERS] + "..."

                                sub_issues_content.append({
                                    'ticket_url': sub_issue_url,
                                    'title': sub_issue.title,
                                    'body': sub_body
                                })
                            except Exception as e:
                                get_logger().warning(f"Failed to fetch sub-issue content for {sub_issue_url}: {e}")

                    except Exception as e:
                        get_logger().warning(f"Failed to fetch sub-issues for {ticket}: {e}")

                    # Extract labels
                    labels = []
                    try:
                        for label in issue_main.labels:
                            labels.append(label.name if hasattr(label, 'name') else label)
                    except Exception as e:
                        get_logger().error(f"Error extracting labels error= {e}",
                                           artifact={"traceback": traceback.format_exc()})

                    tickets_content.append({
                        'ticket_id': issue_main.number,
                        'ticket_url': ticket,
                        'title': issue_main.title,
                        'body': issue_body_str,
                        'labels': ", ".join(labels),
                        'sub_issues': sub_issues_content  # Store sub-issues content
                    })

                return tickets_content

        elif isinstance(git_provider, AzureDevopsProvider):
            tickets_info = git_provider.get_linked_work_items()
            tickets_content = []
            for ticket in tickets_info:
                try:
                    ticket_body_str = ticket.get("body", "")
                    if len(ticket_body_str) > MAX_TICKET_CHARACTERS:
                        ticket_body_str = ticket_body_str[:MAX_TICKET_CHARACTERS] + "..."

                    tickets_content.append(
                        {
                            "ticket_id": ticket.get("id"),
                            "ticket_url": ticket.get("url"),
                            "title": ticket.get("title"),
                            "body": ticket_body_str,
                            "requirements": ticket.get("acceptance_criteria", ""),
                            "labels": ", ".join(ticket.get("labels", [])),
                        }
                    )
                except Exception as e:
                    get_logger().error(
                        f"Error processing Azure DevOps ticket: {e}",
                        artifact={"traceback": traceback.format_exc()},
                    )
            return tickets_content

    except Exception as e:
        get_logger().error(f"Error extracting tickets error= {e}",
                           artifact={"traceback": traceback.format_exc()})


async def extract_and_cache_pr_tickets(git_provider, vars):
    if not get_settings().get('pr_reviewer.require_ticket_analysis_review', False):
        return

    related_tickets = get_settings().get('related_tickets', [])

    if not related_tickets:
        tickets_content = await extract_tickets(git_provider)

        if tickets_content:
            # Store sub-issues along with main issues
            for ticket in tickets_content:
                if "sub_issues" in ticket and ticket["sub_issues"]:
                    for sub_issue in ticket["sub_issues"]:
                        related_tickets.append(sub_issue)  # Add sub-issues content

                related_tickets.append(ticket)

            get_logger().info("Extracted tickets and sub-issues from PR description",
                              artifact={"tickets": related_tickets})

            vars['related_tickets'] = related_tickets
            get_settings().set('related_tickets', related_tickets)
    else:
        get_logger().info("Using cached tickets", artifact={"tickets": related_tickets})
        vars['related_tickets'] = related_tickets


def check_tickets_relevancy():
    return True


================================================
FILE: pr_compliance_checklist.yaml
================================================
pr_compliances:
  - title: "Consistent Naming Conventions"
    compliance_label: false
    objective: "All new variables, functions, and classes must follow the project's established naming standards"
    success_criteria: "All identifiers follow the established naming patterns (camelCase, snake_case, etc.)"
    failure_criteria: "Inconsistent or non-standard naming that deviates from project conventions"

  - title: "No Dead or Commented-Out Code"
    compliance_label: false
    objective: "Keep the codebase clean by ensuring all submitted code is active and necessary"
    success_criteria: "All code in the PR is active and serves a purpose; no commented-out blocks"
    failure_criteria: "Presence of unused, dead, or commented-out code sections"

  - title: "Robust Error Handling"
    compliance_label: false
    objective: "Ensure potential errors and edge cases are anticipated and handled gracefully throughout the code"
    success_criteria: "All error scenarios are properly caught and handled with appropriate responses"
    failure_criteria: "Unhandled exceptions, ignored errors, or missing edge case handling"

  - title: "Single Responsibility for Functions"
    compliance_label: false
    objective: "Each function should have a single, well-defined responsibility"
    success_criteria: "Functions perform one cohesive task with a single purpose"
    failure_criteria: "Functions that combine multiple unrelated operations or handle several distinct concerns"

  - title: "When relevant, utilize early return"
    compliance_label: false
    objective: "In a code snippet containing multiple logic conditions (such as 'if-else'), prefer an early return on edge cases than deep nesting"
    success_criteria: "When relevant, utilize early return that reduces nesting"
    failure_criteria: "Unjustified deep nesting that can be simplified by early return"


================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "pr-agent"
version = "0.3.1"

authors = [{ name = "QodoAI", email = "ofir.f@qodo.ai" }]

maintainers = [
  { name = "Ofir Friedman", email = "ofir.f@qodo.ai" },
]

description = "QodoAI PR-Agent aims to help efficiently review and handle pull requests, by providing AI feedbacks and suggestions."
readme = "README.md"
requires-python = ">=3.12"
keywords = ["AI", "Agents", "Pull Request", "Automation", "Code Review"]
license = { file = "LICENSE" }

classifiers = [
  "Intended Audience :: Developers",
  "Programming Language :: Python :: 3",
]
dynamic = ["dependencies"]


[tool.setuptools.dynamic]
dependencies = { file = ["requirements.txt"] }

[project.urls]
"Homepage" = "https://github.com/qodo-ai/pr-agent"
"Documentation" = "https://qodo-merge-docs.qodo.ai/"

[tool.setuptools]
include-package-data = true

[tool.setuptools.packages.find]
where = ["."]
include = [
  "pr_agent*",
] # include pr_agent and any sub-packages it finds under it.

[project.scripts]
pr-agent = "pr_agent.cli:run"

[tool.ruff]
line-length = 120

lint.select = [
  "E",    # Pyflakes
  "F",    # Pyflakes
  "B",    # flake8-bugbear
  "I001", # isort basic checks
  "I002", # isort missing-required-import
]

# First commit - only fixing isort
lint.fixable = [
  "I001", # isort basic checks
]

lint.unfixable = [
  "B", # Avoid trying to fix flake8-bugbear (`B`) violations.
]

lint.exclude = ["api/code_completions"]

lint.ignore = ["E999", "B008"]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = [
  "E402",
] # Ignore `E402` (import violations) in all `__init__.py` files, and in `path/to/file.py`.

[tool.bandit]
exclude_dirs = ["tests"]
skips = ["B101"]
tests = []

[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
addopts = "--color=yes"
console_output_style = "progress"


================================================
FILE: requirements-dev.txt
================================================
pytest==9.0.2
pytest-asyncio>=1.3.0
poetry
twine
pre-commit>=4,<5


================================================
FILE: requirements.txt
================================================
aiohttp==3.12.15
anthropic>=0.69.0
#anthropic[vertex]==0.47.1
atlassian-python-api==3.41.4
azure-devops==7.1.0b4
azure-identity==1.25.0
boto3==1.40.45
certifi==2024.8.30
dynaconf==3.2.4
fastapi==0.118.0
GitPython==3.1.41
google-cloud-aiplatform==1.38.0
google-generativeai==0.8.3
google-cloud-storage==2.10.0
Jinja2==3.1.6
litellm==1.81.12
loguru==0.7.2
msrest==0.7.1
openai>=1.55.3
pytest==9.0.2
pytest-asyncio>=1.3.0
PyGithub==1.59.*
PyJWT==2.10.1
PyYAML==6.0.1
python-gitlab==3.15.0
retry==0.9.2
starlette-context==0.3.6
tiktoken==0.8.0
ujson==5.8.0
uvicorn==0.22.0
tenacity==8.2.3
gunicorn==23.0.0
pytest-cov==7.0.0
pydantic==2.8.2
html2text==2024.2.26
giteapy==1.0.8
# Uncomment the following lines to enable the 'similar issue' tool
# pinecone-client
# pinecone-datasets @ git+https://github.com/mrT23/pinecone-datasets.git@main
# lancedb==0.5.1
# qdrant-client==1.15.1
# uncomment this to support language LangChainOpenAIHandler
# langchain==0.2.0
# langchain-core==0.2.28
# langchain-openai==0.1.20


================================================
FILE: setup.py
================================================
# for compatibility with legacy tools
# see: https://setuptools.pypa.io/en/latest/userguide/pyproject_config.html
from setuptools import setup

setup()


================================================
FILE: tests/e2e_tests/e2e_utils.py
================================================
FILE_PATH = "pr_agent/cli_pip.py"

PR_HEADER_START_WITH = '### **User description**\nupdate cli_pip.py\n\n\n___\n\n### **PR Type**'
REVIEW_START_WITH = '## PR Reviewer Guide 🔍\n\n<table>\n<tr><td>⏱️&nbsp;<strong>Estimated effort to review</strong>:'
IMPROVE_START_WITH_REGEX_PATTERN = r'^## PR Code Suggestions ✨\n\n<!-- [a-z0-9]+ -->\n\n<table><thead><tr><td>Category</td>'

NUM_MINUTES = 5

NEW_FILE_CONTENT = """\
from pr_agent import cli
from pr_agent.config_loader import get_settings


def main():
    # Fill in the following values
    provider = "github"  # GitHub provider
    user_token = "..."  # GitHub user token
    openai_key = "ghs_afsdfasdfsdf"  # Example OpenAI key
    pr_url = "..."  # PR URL, for example 'https://github.com/Codium-ai/pr-agent/pull/809'
    command = "/improve"  # Command to run (e.g. '/review', '/describe', 'improve', '/ask="What is the purpose of this PR?"')

    # Setting the configurations
    get_settings().set("CONFIG.git_provider", provider)
    get_settings().set("openai.key", openai_key)
    get_settings().set("github.user_token", user_token)

    # Run the command. Feedback will appear in GitHub PR comments
    output = cli.run_command(pr_url, command)

    print(output)

if __name__ == '__main__':
    main()
"""


================================================
FILE: tests/e2e_tests/langchain_ai_handler.py
================================================
import asyncio
import os
import time

from pr_agent.algo.ai_handlers.langchain_ai_handler import LangChainOpenAIHandler
from pr_agent.config_loader import get_settings


def check_settings():
    print('Checking settings...')
    settings = get_settings()
    
    # Check OpenAI settings
    if not hasattr(settings, 'openai'):
        print('OpenAI settings not found')
        return False
    
    if not hasattr(settings.openai, 'key'):
        print('OpenAI API key not found')
        return False
    
    print('OpenAI API key found')
    return True

async def measure_performance(handler, num_requests=3):
    print(f'\nRunning performance test with {num_requests} requests...')
    start_time = time.time()
    
    # Create multiple requests
    tasks = [
        handler.chat_completion(
            model='gpt-3.5-turbo',
            system='You are a helpful assistant',
            user=f'Test message {i}',
            temperature=0.2
        ) for i in range(num_requests)
    ]
    
    # Execute requests concurrently
    responses = await asyncio.gather(*tasks)
    
    end_time = time.time()
    total_time = end_time - start_time
    avg_time = total_time / num_requests
    
    print(f'Performance results:')
    print(f'Total time: {total_time:.2f} seconds')
    print(f'Average time per request: {avg_time:.2f} seconds')
    print(f'Requests per second: {num_requests/total_time:.2f}')
    
    return responses

async def test():
    print('Starting test...')
    
    # Check settings first
    if not check_settings():
        print('Please set up your environment variables or configuration file')
        print('Required: OPENAI_API_KEY')
        return
    
    try:
        handler = LangChainOpenAIHandler()
        print('Handler created')
        
        # Basic functionality test
        response = await handler.chat_completion(
            model='gpt-3.5-turbo',
            system='You are a helpful assistant',
            user='Hello',
            temperature=0.2,
            img_path='test.jpg'
        )
        print('Response:', response)
        
        # Performance test
        await measure_performance(handler)
        
    except Exception as e:
        print('Error:', str(e))
        print('Error type:', type(e))
        print('Error details:', e.__dict__ if hasattr(e, '__dict__') else 'No additional details')

if __name__ == '__main__':
    print('Environment variables:')
    print('OPENAI_API_KEY:', 'Set' if os.getenv('OPENAI_API_KEY') else 'Not set')
    print('OPENAI_API_TYPE:', os.getenv('OPENAI_API_TYPE', 'Not set'))
    print('OPENAI_API_BASE:', os.getenv('OPENAI_API_BASE', 'Not set'))
    
    asyncio.run(test()) 
  
    
================================================
FILE: tests/e2e_tests/test_bitbucket_app.py
================================================
import hashlib
import os
import re
import time
from datetime import datetime

import jwt
import requests
from atlassian.bitbucket import Cloud
from requests.auth import HTTPBasicAuth

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger, setup_logger
from tests.e2e_tests.e2e_utils import (
    FILE_PATH,
    IMPROVE_START_WITH_REGEX_PATTERN,
    NEW_FILE_CONTENT,
    NUM_MINUTES,
    PR_HEADER_START_WITH,
    REVIEW_START_WITH,
)

log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)
logger = get_logger()

def test_e2e_run_bitbucket_app():
    repo_slug = 'pr-agent-tests'
    project_key = 'codiumai'
    base_branch = "main"  # or any base branch you want
    new_branch = f"bitbucket_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
    get_settings().config.git_provider = "bitbucket"

    try:
        # Add username and password for authentication
        username = get_settings().get("BITBUCKET.USERNAME", None)
        password = get_settings().get("BITBUCKET.PASSWORD", None)
        s = requests.Session()
        s.auth = (username, password)  # Use HTTP Basic Auth
        bitbucket_client = Cloud(session=s)
        repo = bitbucket_client.workspaces.get(workspace=project_key).repositories.get(repo_slug)

        # Create a new branch from the base branch
        logger.info(f"Creating a new branch {new_branch} from {base_branch}")
        source_branch = repo.branches.get(base_branch)
        target_repo = repo.branches.create(new_branch,source_branch.hash)

        # Update the file content
        url = f"https://api.bitbucket.org/2.0/repositories/{project_key}/{repo_slug}/src"
        files={FILE_PATH: NEW_FILE_CONTENT}
        data={
            "message": "update cli_pip.py",
            "branch": new_branch,
        }
        requests.request("POST", url, auth=HTTPBasicAuth(username, password), data=data, files=files)


        # Create a pull request
        logger.info(f"Creating a pull request from {new_branch} to {base_branch}")
        pr = repo.pullrequests.create(
            title=f'{new_branch}',
            description="update cli_pip.py",
            source_branch=new_branch,
            destination_branch=base_branch
        )

        # check every 1 minute, for 5 minutes if the PR has all the tool results
        for i in range(NUM_MINUTES):
            logger.info(f"Waiting for the PR to get all the tool results...")
            time.sleep(60)
            comments = list(pr.comments())
            comments_raw = [c.raw for c in comments]
            if len(comments) >= 5: # header, 3 suggestions, 1 review
                valid_review = False
                for comment_raw in comments_raw:
                    if comment_raw.startswith('## PR Reviewer Guide 🔍'):
                        valid_review = True
                        break
                if valid_review:
                    break
                else:
                    logger.error(f"REVIEW feedback is invalid")
                    raise Exception("REVIEW feedback is invalid")
            else:
                logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed")
        else:
            assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results"

        # cleanup - delete the branch
        pr.decline()
        repo.branches.delete(new_branch)

        # If we reach here, the test is successful
        logger.info(f"Succeeded in running e2e test for Bitbucket app on the PR")
    except Exception as e:
        logger.error(f"Failed to run e2e test for Bitbucket app: {e}")
        # delete the branch
        pr.decline()
        repo.branches.delete(new_branch)
        assert False


if __name__ == '__main__':
    test_e2e_run_bitbucket_app()


================================================
FILE: tests/e2e_tests/test_gitea_app.py
================================================
import os
import time
from datetime import datetime

import requests

from pr_agent.config_loader import get_settings
from pr_agent.log import get_logger, setup_logger
from tests.e2e_tests.e2e_utils import (
    FILE_PATH,
    IMPROVE_START_WITH_REGEX_PATTERN,
    NEW_FILE_CONTENT,
    NUM_MINUTES,
    PR_HEADER_START_WITH,
    REVIEW_START_WITH,
)

log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)
logger = get_logger()

def test_e2e_run_gitea_app():
    repo_name = 'pr-agent-tests'
    owner = 'codiumai'
    base_branch = "main"
    new_branch = f"gitea_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
    get_settings().config.git_provider = "gitea"

    headers = None
    pr_number = None
    
    try:
        gitea_url = get_settings().get("GITEA.URL", None)
        gitea_token = get_settings().get("GITEA.TOKEN", None)
        
        if not gitea_url:
            logger.error("GITEA.URL is not set in the configuration")
            logger.info("Please set GITEA.URL in .env file or environment variables")
            assert False, "GITEA.URL is not set in the configuration"
        
        if not gitea_token:
            logger.error("GITEA.TOKEN is not set in the configuration")
            logger.info("Please set GITEA.TOKEN in .env file or environment variables")
            assert False, "GITEA.TOKEN is not set in the configuration"
        
        headers = {
            'Authorization': f'token {gitea_token}',
            'Content-Type': 'application/json',
            'Accept': 'application/json'
        }
        
        logger.info(f"Creating a new branch {new_branch} from {base_branch}")
        
        response = requests.get(
            f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/branches/{base_branch}",
            headers=headers
        )
        response.raise_for_status()
        base_branch_data = response.json()
        base_commit_sha = base_branch_data['commit']['id']
        
        branch_data = {
            'ref': f"refs/heads/{new_branch}",
            'sha': base_commit_sha
        }
        response = requests.post(
            f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/git/refs",
            headers=headers,
            json=branch_data
        )
        response.raise_for_status()
        
        logger.info(f"Updating file {FILE_PATH} in branch {new_branch}")
        
        import base64
        file_content_encoded = base64.b64encode(NEW_FILE_CONTENT.encode()).decode()
        
        try:
            response = requests.get(
                f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/contents/{FILE_PATH}?ref={new_branch}",
                headers=headers
            )
            response.raise_for_status()
            existing_file = response.json()
            file_sha = existing_file.get('sha')
            
            file_data = {
                'message': 'Update cli_pip.py',
                'content': file_content_encoded,
                'sha': file_sha,
                'branch': new_branch
            }
        except:
            file_data = {
                'message': 'Add cli_pip.py',
                'content': file_content_encoded,
                'branch': new_branch
            }
        
        response = requests.put(
            f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/contents/{FILE_PATH}",
            headers=headers,
            json=file_data
        )
        response.raise_for_status()
        
        logger.info(f"Creating a pull request from {new_branch} to {base_branch}")
        pr_data = {
            'title': f'Test PR from {new_branch}',
            'body': 'update cli_pip.py',
            'head': new_branch,
            'base': base_branch
        }
        response = requests.post(
            f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/pulls",
            headers=headers,
            json=pr_data
        )
        response.raise_for_status()
        pr = response.json()
        pr_number = pr['number']
        
        for i in range(NUM_MINUTES):
            logger.info(f"Waiting for the PR to get all the tool results...")
            time.sleep(60)
            
            response = requests.get(
                f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/issues/{pr_number}/comments",
                headers=headers
            )
            response.raise_for_status()
            comments = response.json()
            
            if len(comments) >= 5:
                valid_review = False
                for comment in comments:
                    if comment['body'].startswith('## PR Reviewer Guide 🔍'):
                        valid_review = True
                        break
                if valid_review:
                    break
                else:
                    logger.error("REVIEW feedback is invalid")
                    raise Exception("REVIEW feedback is invalid")
            else:
                logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed")
        else:
            assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results"
        
        logger.info(f"Cleaning up: closing PR and deleting branch {new_branch}")
        
        close_data = {'state': 'closed'}
        response = requests.patch(
            f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/pulls/{pr_number}",
            headers=headers,
            json=close_data
        )
        response.raise_for_status()
        
        response = requests.delete(
            f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/git/refs/heads/{new_branch}",
            headers=headers
        )
        response.raise_for_status()
        
        logger.info(f"Succeeded in running e2e test for Gitea app on the PR")
    except Exception as e:
        logger.error(f"Failed to run e2e test for Gitea app: {e}")
        raise
    finally:
        try:
            if headers is None or gitea_url is None:
                return
                
            if pr_number is not None:
                requests.patch(
                    f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/pulls/{pr_number}",
                    headers=headers,
                    json={'state': 'closed'}
                )
            
            requests.delete(
                f"{gitea_url}/api/v1/repos/{owner}/{repo_name}/git/refs/heads/{new_branch}",
                headers=headers
            )
        except Exception as cleanup_error:
            logger.error(f"Failed to clean up after test: {cleanup_error}")

if __name__ == '__main__':
    test_e2e_run_gitea_app() 

================================================
FILE: tests/e2e_tests/test_github_app.py
================================================
import os
import re
import time
from datetime import datetime

from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.log import get_logger, setup_logger
from tests.e2e_tests.e2e_utils import (
    FILE_PATH,
    IMPROVE_START_WITH_REGEX_PATTERN,
    NEW_FILE_CONTENT,
    NUM_MINUTES,
    PR_HEADER_START_WITH,
    REVIEW_START_WITH,
)

log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)
logger = get_logger()


def test_e2e_run_github_app():
    """
    What we want to do:
    (1) open a PR in a repo 'https://github.com/Codium-ai/pr-agent-tests'
    (2) wait for 5 minutes until the PR is processed by the GitHub app
    (3) check that the relevant tools have been executed
    """
    base_branch = "main"  # or any base branch you want
    new_branch = f"github_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
    repo_url = 'Codium-ai/pr-agent-tests'
    get_settings().config.git_provider = "github"
    git_provider = get_git_provider()()
    github_client = git_provider.github_client
    repo = github_client.get_repo(repo_url)

    try:
        # Create a new branch from the base branch
        source = repo.get_branch(base_branch)
        logger.info(f"Creating a new branch {new_branch} from {base_branch}")
        repo.create_git_ref(ref=f"refs/heads/{new_branch}", sha=source.commit.sha)

        # Get the file you want to edit
        file = repo.get_contents(FILE_PATH, ref=base_branch)
        # content = file.decoded_content.decode()

        # Update the file content
        logger.info(f"Updating the file {FILE_PATH}")
        commit_message = "update cli_pip.py"
        repo.update_file(
            file.path,
            commit_message,
            NEW_FILE_CONTENT,
            file.sha,
            branch=new_branch
        )

        # Create a pull request
        logger.info(f"Creating a pull request from {new_branch} to {base_branch}")
        pr = repo.create_pull(
            title=new_branch,
            body="update cli_pip.py",
            head=new_branch,
            base=base_branch
        )

        # check every 1 minute, for 5, minutes if the PR has all the tool results
        for i in range(NUM_MINUTES):
            logger.info(f"Waiting for the PR to get all the tool results...")
            time.sleep(60)
            logger.info(f"Checking the PR {pr.html_url} after {i + 1} minute(s)")
            pr.update()
            pr_header_body = pr.body
            comments = list(pr.get_issue_comments())
            if len(comments) == 2:
                comments_body = [comment.body for comment in comments]
                assert pr_header_body.startswith(PR_HEADER_START_WITH), "DESCRIBE feedback is invalid"
                assert comments_body[0].startswith(REVIEW_START_WITH), "REVIEW feedback is invalid"
                assert re.match(IMPROVE_START_WITH_REGEX_PATTERN, comments_body[1]), "IMPROVE feedback is invalid"
                break
            else:
                logger.info(f"Waiting for the PR to get all the tool results. {i + 1} minute(s) passed")
        else:
            assert False, f"After {NUM_MINUTES} minutes, the PR did not get all the tool results"

        # cleanup - delete the branch
        logger.info(f"Deleting the branch {new_branch}")
        repo.get_git_ref(f"heads/{new_branch}").delete()

        # If we reach here, the test is successful
        logger.info(f"Succeeded in running e2e test for GitHub app on the PR {pr.html_url}")
    except Exception as e:
        logger.error(f"Failed to run e2e test for GitHub app: {e}")
        # delete the branch
        logger.info(f"Deleting the branch {new_branch}")
        repo.get_git_ref(f"heads/{new_branch}").delete()
        assert False


if __name__ == '__main__':
    test_e2e_run_github_app()


================================================
FILE: tests/e2e_tests/test_gitlab_webhook.py
================================================
import os
import re
import time
from datetime import datetime

import gitlab

from pr_agent.config_loader import get_settings
from pr_agent.git_providers import get_git_provider
from pr_agent.log import get_logger, setup_logger
from tests.e2e_tests.e2e_utils import (
    FILE_PATH,
    IMPROVE_START_WITH_REGEX_PATTERN,
    NEW_FILE_CONTENT,
    NUM_MINUTES,
    PR_HEADER_START_WITH,
    REVIEW_START_WITH,
)

log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)
logger = get_logger()

def test_e2e_run_github_app():
    # GitLab setup
    GITLAB_URL = "https://gitlab.com"
    GITLAB_TOKEN = get_settings().gitlab.PERSONAL_ACCESS_TOKEN
    gl = gitlab.Gitlab(GITLAB_URL, private_token=GITLAB_TOKEN)
    repo_url = 'codiumai/pr-agent-tests'
    project = gl.projects.get(repo_url)

    base_branch = "main"  # or any base branch you want
    new_branch = f"github_app_e2e_test-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"

    try:
        # Create a new branch from the base branch
        logger.info(f"Creating a new branch {new_branch} from {base_branch}")
        project.branches.create({'branch': new_branch, 'ref': base_branch})

        # Get the file you want to edit
        file = project.files.get(file_path=FILE_PATH, ref=base_branch)
        # content = file.decode()

        # Update the file content
        logger.info(f"Updating the file {FILE_PATH}")
        commit_message = "update cli_pip.py"
        file.content = NEW_FILE_CONTENT
        file.save(branch=new_branch, commit_message=commit_message)

        # Create a merge request
        logger.info(f"Creating a merge request from {new_branch} to {base_branch}")
        mr = project.mergerequests.create({
            'source_branch': new_branch,
            'target_branch': base_branch,
            'title': new_branch,
            'description': "update cli_pip.py"
        })
        logger.info(f"Merge request created: {mr.web_url}")

        # check every 1 minute, for 5, minutes if the PR has all the tool results
        for i in range(NUM_MINUTES):
            logger.info(f"Waiting for the MR to get all the tool results...")
            time.sleep(60)
            logger.info(f"Checking the MR {mr.web_url} after {i + 1} minute(s)")
            mr = project.mergerequests.get(mr.iid)
            mr_header_body = mr.description
            comments = mr.notes.list()[::-1]
            # clean all system comments
            comments = [comment for comment in comments if comment.system is False]
            if len(comments) == 2: # "changed the description" is received as the first comment
                comments_body = [comment.body for comment in comments]
                if 'Work in progress' in comments_body[1]:
                    continue
                assert mr_header_body.startswith(PR_HEADER_START_WITH), "DESCRIBE feedback is invalid"
                assert comments_body[0].startswith(REVIEW_START_WITH), "REVIEW feedback is invalid"
                assert re.match(IMPROVE_START_WITH_REGEX_PATTERN, comments_body[1]), "IMPROVE feedback is invalid"
                break
            else:
                logger.info(f"Waiting for the MR to get all the tool results. {i + 1} minute(s) passed")
        else:
            assert False, f"After {NUM_MINUTES} minutes, the MR did not get all the tool results"

        # cleanup - delete the branch
        logger.info(f"Deleting the branch {new_branch}")
        project.branches.delete(new_branch)

        # If we reach here, the test is successful
        logger.info(f"Succeeded in running e2e test for GitLab app on the MR {mr.web_url}")
    except Exception as e:
        logger.error(f"Failed to run e2e test for GitHub app: {e}")
        logger.info(f"Deleting the branch {new_branch}")
        project.branches.delete(new_branch)
        assert False


if __name__ == '__main__':
    test_e2e_run_github_app()


================================================
FILE: tests/health_test/main.py
================================================
import argparse
import asyncio
import copy
import os
from pathlib import Path

from starlette_context import context, request_cycle_context

from pr_agent.agent.pr_agent import PRAgent, commands
from pr_agent.cli import run_command
from pr_agent.config_loader import get_settings, global_settings
from pr_agent.log import get_logger, setup_logger
from tests.e2e_tests import e2e_utils

log_level = os.environ.get("LOG_LEVEL", "INFO")
setup_logger(log_level)


async def run_async():
    pr_url = os.getenv('TEST_PR_URL', 'https://github.com/Codium-ai/pr-agent/pull/1385')

    get_settings().set("config.git_provider", "github")
    get_settings().set("config.publish_output", False)
    get_settings().set("config.fallback_models", [])

    agent = PRAgent()
    try:
        # Run the 'describe' command
        get_logger().info(f"\nSanity check for the 'describe' command...")
        original_settings = copy.deepcopy(get_settings())
        await agent.handle_request(pr_url, ['describe'])
        pr_header_body = dict(get_settings().data)['artifact']
        assert pr_header_body.startswith('###') and 'PR Type' in pr_header_body and 'Description' in pr_header_body
        context['settings'] = copy.deepcopy(original_settings) # Restore settings state after each test to prevent test interference
        get_logger().info("PR description generated successfully\n")

        # Run the 'review' command
        get_logger().info(f"\nSanity check for the 'review' command...")
        original_settings = copy.deepcopy(get_settings())
        await agent.handle_request(pr_url, ['review'])
        pr_review_body = dict(get_settings().data)['artifact']
        assert pr_review_body.startswith('##') and 'PR Reviewer Guide' in pr_review_body
        context['settings'] = copy.deepcopy(original_settings)  # Restore settings state after each test to prevent test interference
        get_logger().info("PR review generated successfully\n")

        # Run the 'improve' command
        get_logger().info(f"\nSanity check for the 'improve' command...")
        original_settings = copy.deepcopy(get_settings())
        await agent.handle_request(pr_url, ['improve'])
        pr_improve_body = dict(get_settings().data)['artifact']
        assert pr_improve_body.startswith('##') and 'PR Code Suggestions' in pr_improve_body
        context['settings'] = copy.deepcopy(original_settings)  # Restore settings state after each test to prevent test interference
        get_logger().info("PR improvements generated successfully\n")

        get_logger().info(f"\n\n========\nHealth test passed successfully\n========")

    except Exception as e:
        get_logger().exception(f"\n\n========\nHealth test failed\n========")
        raise e


def run():
    with request_cycle_context({}):
        context['settings'] = copy.deepcopy(global_settings)
        asyncio.run(run_async())


if __name__ == '__main__':
    run()


================================================
FILE: tests/unittest/test_add_docs_trigger.py
================================================
import pytest

from pr_agent.agent.pr_agent import PRAgent
from pr_agent.config_loader import get_settings
from pr_agent.identity_providers import get_identity_provider
from pr_agent.identity_providers.identity_provider import Eligibility
from pr_agent.servers.github_app import handle_new_pr_opened
from pr_agent.tools.pr_add_docs import PRAddDocs


@pytest.mark.asyncio
@pytest.mark.parametrize(
    "action,draft,state,should_run",
    [
        ("opened", False, "open", True),
        ("edited", False, "open", False),
        ("opened", True, "open", False),
        ("opened", False, "closed", False),
    ],
)
async def test_add_docs_trigger(monkeypatch, action, draft, state, should_run):
    # Mock settings to enable the "/add_docs" auto-command on PR opened
    settings = get_settings()
    settings.github_app.pr_commands = ["/add_docs"]
    settings.github_app.handle_pr_actions = ["opened"]

    # Define a FakeGitProvider for both apply_repo_settings and PRAddDocs
    class FakeGitProvider:
        def __init__(self, pr_url, *args, **kwargs):
            self.pr = type("pr", (), {"title": "Test PR"})()
            self.get_pr_branch = lambda: "test-branch"
            self.get_pr_description = lambda: "desc"
            self.get_languages = lambda: ["Python"]
            self.get_files = lambda: []
            self.get_commit_messages = lambda: "msg"
            self.publish_comment = lambda *args, **kwargs: None
            self.remove_initial_comment = lambda: None
            self.publish_code_suggestions = lambda suggestions: True
            self.diff_files = []
            self.get_repo_settings = lambda: {}

    # Patch Git provider lookups
    monkeypatch.setattr(
        "pr_agent.git_providers.utils.get_git_provider_with_context",
        lambda pr_url: FakeGitProvider(pr_url),
    )
    monkeypatch.setattr(
        "pr_agent.tools.pr_add_docs.get_git_provider",
        lambda: FakeGitProvider,
    )

    # Ensure identity provider always eligible
    monkeypatch.setattr(
        get_identity_provider().__class__,
        "verify_eligibility",
        lambda *args, **kwargs: Eligibility.ELIGIBLE,
    )

    # Spy on PRAddDocs.run()
    ran = {"flag": False}

    async def fake_run(self):
        ran["flag"] = True

    monkeypatch.setattr(PRAddDocs, "run", fake_run)

    # Build minimal PR payload
    body = {
        "action": action,
        "pull_request": {
            "url": "https://example.com/fake/pr",
            "state": state,
            "draft": draft,
        },
    }
    log_context = {}

    # Invoke the PR-open handler
    agent = PRAgent()
    await handle_new_pr_opened(
        body=body,
        event="pull_request",
        sender="tester",
        sender_id="123",
        action=action,
        log_context=log_context,
        agent=agent,
    )

    assert ran["flag"] is should_run, (
        f"Expected run() to be {'called' if should_run else 'skipped'}"
        f" for action={action!r}, draft={draft}, state={state!r}"
    )


================================================
FILE: tests/unittest/test_aws_secrets_manager_provider.py
================================================
import json
from unittest.mock import MagicMock, patch

import pytest
from botocore.exceptions import ClientError

from pr_agent.secret_providers.aws_secrets_manager_provider import AWSSecretsManagerProvider


class TestAWSSecretsManagerProvider:

    def _provider(self):
        """Create provider following existing pattern"""
        with patch('pr_agent.secret_providers.aws_secrets_manager_provider.get_settings') as mock_get_settings, \
             patch('pr_agent.secret_providers.aws_secrets_manager_provider.boto3.client') as mock_boto3_client:

            settings = MagicMock()
            settings.get.side_effect = lambda k, d=None: {
                'aws_secrets_manager.secret_arn': 'arn:aws:secretsmanager:us-east-1:123456789012:secret:test-secret',
                'aws_secrets_manager.region_name': 'us-east-1',
                'aws.AWS_REGION_NAME': 'us-east-1'
            }.get(k, d)
            settings.aws_secrets_manager.secret_arn = 'arn:aws:secretsmanager:us-east-1:123456789012:secret:test-secret'
            mock_get_settings.return_value = settings

            # Mock boto3 client
            mock_client = MagicMock()
            mock_boto3_client.return_value = mock_client

            provider = AWSSecretsManagerProvider()
            provider.client = mock_client  # Set client directly for testing
            return provider, mock_client

    # Positive test cases
    def test_get_secret_success(self):
        provider, mock_client = self._provider()
        mock_client.get_secret_value.return_value = {'SecretString': 'test-secret-value'}

        result = provider.get_secret('test-secret-name')
        assert result == 'test-secret-value'
        mock_client.get_secret_value.assert_called_once_with(SecretId='test-secret-name')

    def test_get_all_secrets_success(self):
        provider, mock_client = self._provider()
        secret_data = {'openai.key': 'sk-test', 'github.webhook_secret': 'webhook-secret'}
        mock_client.get_secret_value.return_value = {'SecretString': json.dumps(secret_data)}

        result = provider.get_all_secrets()
        assert result == secret_data

    # Negative test cases (following Google Cloud Storage pattern)
    def test_get_secret_failure(self):
        provider, mock_client = self._provider()
        mock_client.get_secret_value.side_effect = Exception("AWS error")

        result = provider.get_secret('nonexistent-secret')
        assert result == ""  # Confirm empty string is returned

    def test_get_all_secrets_failure(self):
        provider, mock_client = self._provider()
        mock_client.get_secret_value.side_effect = Exception("AWS error")

        result = provider.get_all_secrets()
        assert result == {}  # Confirm empty dictionary is returned

    def test_store_secret_update_existing(self):
        provider, mock_client = self._provider()
        mock_client.update_secret.return_value = {}

        provider.store_secret('test-secret', 'test-value')
        mock_client.put_secret_value.assert_called_once_with(
            SecretId='test-secret',
            SecretString='test-value'
        )

    def test_init_failure_invalid_config(self):
        with patch('pr_agent.secret_providers.aws_secrets_manager_provider.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.aws_secrets_manager.secret_arn = None  # Configuration error
            mock_get_settings.return_value = settings

            with pytest.raises(Exception):
                AWSSecretsManagerProvider()

    def test_store_secret_failure(self):
        provider, mock_client = self._provider()
        mock_client.put_secret_value.side_effect = Exception("AWS error")

        with pytest.raises(Exception):
            provider.store_secret('test-secret', 'test-value') 


================================================
FILE: tests/unittest/test_azure_devops_comment.py
================================================
import unittest
from unittest.mock import MagicMock, patch

from pr_agent.config_loader import get_settings
from pr_agent.git_providers import AzureDevopsProvider


class TestAzureDevopsProviderPublishComment(unittest.TestCase):
    @patch("pr_agent.git_providers.azuredevops_provider.get_settings")
    def test_publish_comment_default_closed(self, mock_get_settings):
        # Simulate config with no default_comment_status
        mock_settings = MagicMock()
        mock_settings.azure_devops.get.return_value = "closed"
        mock_settings.config.publish_output_progress = True
        mock_get_settings.return_value = mock_settings

        with patch.object(AzureDevopsProvider, "_get_azure_devops_client", return_value=(MagicMock(), MagicMock())):
            provider = AzureDevopsProvider()
            provider.workspace_slug = "ws"
            provider.repo_slug = "repo"
            provider.pr_num = 1

            # Patch CommentThread and create_thread
            with patch("pr_agent.git_providers.azuredevops_provider.CommentThread") as MockThread:
                provider.azure_devops_client.create_thread.return_value.comments = [MagicMock()]
                provider.azure_devops_client.create_thread.return_value.comments[0].thread_id = 123
                provider.azure_devops_client.create_thread.return_value.id = 123

                provider.publish_comment("test comment")
                args, kwargs = MockThread.call_args
                assert kwargs.get("status") == "closed"

    @patch("pr_agent.git_providers.azuredevops_provider.get_settings")
    def test_publish_comment_active(self, mock_get_settings):
        # Simulate config with default_comment_status = "active"
        mock_settings = MagicMock()
        mock_settings.azure_devops.get.return_value = "active"
        mock_settings.config.publish_output_progress = True
        mock_get_settings.return_value = mock_settings

        with patch.object(AzureDevopsProvider, "_get_azure_devops_client", return_value=(MagicMock(), MagicMock())):
            provider = AzureDevopsProvider()
            provider.workspace_slug = "ws"
            provider.repo_slug = "repo"
            provider.pr_num = 1

            # Patch CommentThread and create_thread
            with patch("pr_agent.git_providers.azuredevops_provider.CommentThread") as MockThread:
                provider.azure_devops_client.create_thread.return_value.comments = [MagicMock()]
                provider.azure_devops_client.create_thread.return_value.comments[0].thread_id = 123
                provider.azure_devops_client.create_thread.return_value.id = 123

                provider.publish_comment("test comment")
                args, kwargs = MockThread.call_args
                assert kwargs.get("status") == "active"

    def test_default_comment_status_from_config_file(self):
        # Import get_settings directly to read from configuration.toml
        status = get_settings().azure_devops.default_comment_status
        # The expected value should match what's in your configuration.toml
        self.assertEqual(status, "closed")        

================================================
FILE: tests/unittest/test_azure_devops_parsing.py
================================================
from pr_agent.git_providers import AzureDevopsProvider


class TestAzureDevOpsParsing:
    def test_regular_address(self):
        pr_url = "https://dev.azure.com/organization/project/_git/repo/pullrequest/1"

        # workspace_slug, repo_slug, pr_number
        assert AzureDevopsProvider._parse_pr_url(pr_url) == ("project", "repo", 1)

    def test_visualstudio_address(self):
        pr_url = "https://organization.visualstudio.com/project/_git/repo/pullrequest/1"

        # workspace_slug, repo_slug, pr_number
        assert AzureDevopsProvider._parse_pr_url(pr_url) == ("project", "repo", 1)
        
    def test_self_hosted_address(self):
        pr_url = "http://server.be:8080/tfs/department/project/_git/repo/pullrequest/1"

        # workspace_slug, repo_slug, pr_number
        assert AzureDevopsProvider._parse_pr_url(pr_url) == ("project", "repo", 1)


================================================
FILE: tests/unittest/test_bitbucket_provider.py
================================================
from unittest.mock import MagicMock

from atlassian.bitbucket import Bitbucket

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.git_providers import BitbucketServerProvider
from pr_agent.git_providers.bitbucket_provider import BitbucketProvider


class TestBitbucketProvider:
    def test_parse_pr_url(self):
        url = "https://bitbucket.org/WORKSPACE_XYZ/MY_TEST_REPO/pull-requests/321"
        workspace_slug, repo_slug, pr_number = BitbucketProvider._parse_pr_url(url)
        assert workspace_slug == "WORKSPACE_XYZ"
        assert repo_slug == "MY_TEST_REPO"
        assert pr_number == 321


class TestBitbucketServerProvider:
    def test_parse_pr_url(self):
        url = "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1"
        workspace_slug, repo_slug, pr_number = BitbucketServerProvider._parse_pr_url(url)
        assert workspace_slug == "AAA"
        assert repo_slug == "my-repo"
        assert pr_number == 1

    def test_parse_pr_url_with_users(self):
        url = "https://bitbucket.company-server.url/users/username/repos/my-repo/pull-requests/1"
        workspace_slug, repo_slug, pr_number = BitbucketServerProvider._parse_pr_url(url)
        assert workspace_slug == "~username"
        assert repo_slug == "my-repo"
        assert pr_number == 1

    def mock_get_content_of_file(self, project_key, repository_slug, filename, at=None, markup=None):
        content_map = {
            '9c1cffdd9f276074bfb6fb3b70fbee62d298b058': 'file\nwith\nsome\nlines\nto\nemulate\na\nreal\nfile\n',
            '2a1165446bdf991caf114d01f7c88d84ae7399cf': 'file\nwith\nmultiple \nlines\nto\nemulate\na\nfake\nfile\n',
            'f617708826cdd0b40abb5245eda71630192a17e3': 'file\nwith\nmultiple \nlines\nto\nemulate\na\nreal\nfile\n',
            'cb68a3027d6dda065a7692ebf2c90bed1bcdec28': 'file\nwith\nsome\nchanges\nto\nemulate\na\nreal\nfile\n',
            '1905dcf16c0aac6ac24f7ab617ad09c73dc1d23b': 'file\nwith\nsome\nlines\nto\nemulate\na\nfake\ntest\n',
            'ae4eca7f222c96d396927d48ab7538e2ee13ca63': 'readme\nwithout\nsome\nlines\nto\nsimulate\na\nreal\nfile',
            '548f8ba15abc30875a082156314426806c3f4d97': 'file\nwith\nsome\nlines\nto\nemulate\na\nreal\nfile',
            '0e898cb355a5170d8c8771b25d43fcaa1d2d9489': 'file\nwith\nmultiple\nlines\nto\nemulate\na\nreal\nfile'
        }
        return content_map.get(at, '')

    def mock_get_from_bitbucket_60(self, url):
        response_map = {
            "rest/api/1.0/application-properties": {
                "version": "6.0"
            }
        }
        return response_map.get(url, '')

    def mock_get_from_bitbucket_70(self, url):
        response_map = {
            "rest/api/1.0/application-properties": {
                "version": "7.0"
            }
        }
        return response_map.get(url, '')

    def mock_get_from_bitbucket_816(self, url):
        response_map = {
            "rest/api/1.0/application-properties": {
                "version": "8.16"
            },
            "rest/api/latest/projects/AAA/repos/my-repo/pull-requests/1/merge-base": {
                'id': '548f8ba15abc30875a082156314426806c3f4d97'
            }
        }
        return response_map.get(url, '')


    '''
    tests the 2-way diff functionality where the diff should be between the HEAD of branch b and node c
    NOT between the HEAD of main and the HEAD of branch b

          - o  branch b
         /
    o - o - o  main
        ^ node c
    '''
    def test_get_diff_files_simple_diverge_70(self):
        bitbucket_client = MagicMock(Bitbucket)
        bitbucket_client.get_pull_request.return_value = {
            'toRef': {'latestCommit': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},
            'fromRef': {'latestCommit': '2a1165446bdf991caf114d01f7c88d84ae7399cf'}
        }
        bitbucket_client.get_pull_requests_commits.return_value = [
            {'id': '2a1165446bdf991caf114d01f7c88d84ae7399cf',
             'parents': [{'id': 'f617708826cdd0b40abb5245eda71630192a17e3'}]}
        ]
        bitbucket_client.get_commits.return_value = [
            {'id': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},
            {'id': 'dbca09554567d2e4bee7f07993390153280ee450'}
        ]
        bitbucket_client.get_pull_requests_changes.return_value = [
            {
                'path': {'toString': 'Readme.md'},
                'type': 'MODIFY',
            }
        ]

        bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_70
        bitbucket_client.get_content_of_file.side_effect = self.mock_get_content_of_file

        provider = BitbucketServerProvider(
            "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1",
            bitbucket_client=bitbucket_client
        )

        expected = [
            FilePatchInfo(
                'file\nwith\nmultiple \nlines\nto\nemulate\na\nreal\nfile\n',
                'file\nwith\nmultiple \nlines\nto\nemulate\na\nfake\nfile\n',
                '--- \n+++ \n@@ -5,5 +5,5 @@\n to\n emulate\n a\n-real\n+fake\n file\n',
                'Readme.md',
                edit_type=EDIT_TYPE.MODIFIED,
            )
        ]

        actual = provider.get_diff_files()

        assert actual == expected


    '''
    tests the 2-way diff functionality where the diff should be between the HEAD of branch b and node c
    NOT between the HEAD of main and the HEAD of branch b

          - o - o - o  branch b
         /     /
    o - o -- o - o     main
             ^ node c
    '''
    def test_get_diff_files_diverge_with_merge_commit_70(self):
        bitbucket_client = MagicMock(Bitbucket)
        bitbucket_client.get_pull_request.return_value = {
            'toRef': {'latestCommit': 'cb68a3027d6dda065a7692ebf2c90bed1bcdec28'},
            'fromRef': {'latestCommit': '1905dcf16c0aac6ac24f7ab617ad09c73dc1d23b'}
        }
        bitbucket_client.get_pull_requests_commits.return_value = [
            {'id': '1905dcf16c0aac6ac24f7ab617ad09c73dc1d23b',
             'parents': [{'id': '692772f456c3db77a90b11ce39ea516f8c2bad93'}]},
            {'id': '692772f456c3db77a90b11ce39ea516f8c2bad93', 'parents': [
                {'id': '2a1165446bdf991caf114d01f7c88d84ae7399cf'},
                {'id': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},
            ]},
            {'id': '2a1165446bdf991caf114d01f7c88d84ae7399cf',
             'parents': [{'id': 'f617708826cdd0b40abb5245eda71630192a17e3'}]}
        ]
        bitbucket_client.get_commits.return_value = [
            {'id': 'cb68a3027d6dda065a7692ebf2c90bed1bcdec28'},
            {'id': '9c1cffdd9f276074bfb6fb3b70fbee62d298b058'},
            {'id': 'dbca09554567d2e4bee7f07993390153280ee450'}
        ]
        bitbucket_client.get_pull_requests_changes.return_value = [
            {
                'path': {'toString': 'Readme.md'},
                'type': 'MODIFY',
            }
        ]

        bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_70
        bitbucket_client.get_content_of_file.side_effect = self.mock_get_content_of_file

        provider = BitbucketServerProvider(
            "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1",
            bitbucket_client=bitbucket_client
        )

        expected = [
            FilePatchInfo(
                'file\nwith\nsome\nlines\nto\nemulate\na\nreal\nfile\n',
                'file\nwith\nsome\nlines\nto\nemulate\na\nfake\ntest\n',
                '--- \n+++ \n@@ -5,5 +5,5 @@\n to\n emulate\n a\n-real\n-file\n+fake\n+test\n',
                'Readme.md',
                edit_type=EDIT_TYPE.MODIFIED,
            )
        ]

        actual = provider.get_diff_files()

        assert actual == expected


    '''
    tests the 2-way diff functionality where the diff should be between the HEAD of branch c and node d
    NOT between the HEAD of main and the HEAD of branch c

            ---- o - o branch c
           /    /
          ---- o       branch b
         /    /
        o - o - o      main
            ^ node d
    '''
    def get_multi_merge_diverge_mock_client(self, api_version):
        bitbucket_client = MagicMock(Bitbucket)
        bitbucket_client.get_pull_request.return_value = {
            'toRef': {'latestCommit': '9569922b22fe4fd0968be6a50ed99f71efcd0504'},
            'fromRef': {'latestCommit': 'ae4eca7f222c96d396927d48ab7538e2ee13ca63'}
        }
        bitbucket_client.get_pull_requests_commits.return_value = [
            {'id': 'ae4eca7f222c96d396927d48ab7538e2ee13ca63',
             'parents': [{'id': 'bbf300fb3af5129af8c44659f8cc7a526a6a6f31'}]},
            {'id': 'bbf300fb3af5129af8c44659f8cc7a526a6a6f31', 'parents': [
                {'id': '10b7b8e41cb370b48ceda8da4e7e6ad033182213'},
                {'id': 'd1bb183c706a3ebe4c2b1158c25878201a27ad8c'},
            ]},
            {'id': 'd1bb183c706a3ebe4c2b1158c25878201a27ad8c', 'parents': [
                {'id': '5bd76251866cb415fc5ff232f63a581e89223bda'},
                {'id': '548f8ba15abc30875a082156314426806c3f4d97'}
            ]},
            {'id': '5bd76251866cb415fc5ff232f63a581e89223bda',
             'parents': [{'id': '0e898cb355a5170d8c8771b25d43fcaa1d2d9489'}]},
            {'id': '10b7b8e41cb370b48ceda8da4e7e6ad033182213',
             'parents': [{'id': '0e898cb355a5170d8c8771b25d43fcaa1d2d9489'}]}
        ]
        bitbucket_client.get_commits.return_value = [
            {'id': '9569922b22fe4fd0968be6a50ed99f71efcd0504'},
            {'id': '548f8ba15abc30875a082156314426806c3f4d97'}
        ]
        bitbucket_client.get_pull_requests_changes.return_value = [
            {
                'path': {'toString': 'Readme.md'},
                'type': 'MODIFY',
            }
        ]

        bitbucket_client.get_content_of_file.side_effect = self.mock_get_content_of_file
        if api_version == 60:
            bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_60
        elif api_version == 70:
            bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_70
        elif api_version == 816:
            bitbucket_client.get.side_effect = self.mock_get_from_bitbucket_816

        return bitbucket_client

    def test_get_diff_files_multi_merge_diverge_60(self):
        bitbucket_client = self.get_multi_merge_diverge_mock_client(60)

        provider = BitbucketServerProvider(
            "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1",
            bitbucket_client=bitbucket_client
        )

        expected = [
            FilePatchInfo(
                'file\nwith\nmultiple\nlines\nto\nemulate\na\nreal\nfile',
                'readme\nwithout\nsome\nlines\nto\nsimulate\na\nreal\nfile',
                '--- \n+++ \n@@ -1,9 +1,9 @@\n-file\n-with\n-multiple\n+readme\n+without\n+some\n lines\n to\n-emulate\n+simulate\n a\n real\n file\n',
                'Readme.md',
                edit_type=EDIT_TYPE.MODIFIED,
            )
        ]

        actual = provider.get_diff_files()

        assert actual == expected

    def test_get_diff_files_multi_merge_diverge_70(self):
        bitbucket_client = self.get_multi_merge_diverge_mock_client(70)

        provider = BitbucketServerProvider(
            "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1",
            bitbucket_client=bitbucket_client
        )

        expected = [
            FilePatchInfo(
                'file\nwith\nsome\nlines\nto\nemulate\na\nreal\nfile',
                'readme\nwithout\nsome\nlines\nto\nsimulate\na\nreal\nfile',
                '--- \n+++ \n@@ -1,9 +1,9 @@\n-file\n-with\n+readme\n+without\n some\n lines\n to\n-emulate\n+simulate\n a\n real\n file\n',
                'Readme.md',
                edit_type=EDIT_TYPE.MODIFIED,
            )
        ]

        actual = provider.get_diff_files()

        assert actual == expected

    def test_get_diff_files_multi_merge_diverge_816(self):
        bitbucket_client = self.get_multi_merge_diverge_mock_client(816)

        provider = BitbucketServerProvider(
            "https://git.onpreminstance.com/projects/AAA/repos/my-repo/pull-requests/1",
            bitbucket_client=bitbucket_client
        )

        expected = [
            FilePatchInfo(
                'file\nwith\nsome\nlines\nto\nemulate\na\nreal\nfile',
                'readme\nwithout\nsome\nlines\nto\nsimulate\na\nreal\nfile',
                '--- \n+++ \n@@ -1,9 +1,9 @@\n-file\n-with\n+readme\n+without\n some\n lines\n to\n-emulate\n+simulate\n a\n real\n file\n',
                'Readme.md',
                edit_type=EDIT_TYPE.MODIFIED,
            )
        ]

        actual = provider.get_diff_files()

        assert actual == expected


================================================
FILE: tests/unittest/test_clip_tokens.py
================================================
from unittest.mock import MagicMock, patch

import pytest

from pr_agent.algo.token_handler import TokenEncoder
from pr_agent.algo.utils import clip_tokens


class TestClipTokens:
    """Comprehensive test suite for the clip_tokens function."""

    def test_empty_input_text(self):
        """Test that empty input returns empty string."""
        assert clip_tokens("", 10) == ""
        assert clip_tokens(None, 10) is None

    def test_text_under_token_limit(self):
        """Test that text under the token limit is returned unchanged."""
        text = "Short text"
        max_tokens = 100
        result = clip_tokens(text, max_tokens)
        assert result == text

    def test_text_exactly_at_token_limit(self):
        """Test text that is exactly at the token limit."""
        text = "This is exactly at the limit"
        # Mock the token encoder to return exact limit
        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 10  # Exactly 10 tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, 10)
            assert result == text

    def test_text_over_token_limit_with_three_dots(self):
        """Test text over token limit with three dots addition."""
        text = "This is a longer text that should be clipped when it exceeds the token limit"
        max_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)
            assert result.endswith("\n...(truncated)")
            assert len(result) < len(text)

    def test_text_over_token_limit_without_three_dots(self):
        """Test text over token limit without three dots addition."""
        text = "This is a longer text that should be clipped"
        max_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens, add_three_dots=False)
            assert not result.endswith("\n...(truncated)")
            assert len(result) < len(text)

    def test_negative_max_tokens(self):
        """Test that negative max_tokens returns empty string."""
        text = "Some text"
        result = clip_tokens(text, -1)
        assert result == ""

        result = clip_tokens(text, -100)
        assert result == ""

    def test_zero_max_tokens(self):
        """Test that zero max_tokens returns empty string."""
        text = "Some text"
        result = clip_tokens(text, 0)
        assert result == ""

    def test_delete_last_line_functionality(self):
        """Test the delete_last_line parameter functionality."""
        text = "Line 1\nLine 2\nLine 3\nLine 4"
        max_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens
            mock_encoder.return_value = mock_tokenizer

            # Without delete_last_line
            result_normal = clip_tokens(text, max_tokens, delete_last_line=False)

            # With delete_last_line
            result_deleted = clip_tokens(text, max_tokens, delete_last_line=True)

            # The result with delete_last_line should be shorter or equal
            assert len(result_deleted) <= len(result_normal)

    def test_pre_computed_num_input_tokens(self):
        """Test using pre-computed num_input_tokens parameter."""
        text = "This is a test text"
        max_tokens = 10
        num_input_tokens = 15

        # Should not call the encoder when num_input_tokens is provided
        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_encoder.return_value = None  # Should not be called

            result = clip_tokens(text, max_tokens, num_input_tokens=num_input_tokens)
            assert result.endswith("\n...(truncated)")
            mock_encoder.assert_not_called()

    def test_pre_computed_tokens_under_limit(self):
        """Test pre-computed tokens under the limit."""
        text = "Short text"
        max_tokens = 20
        num_input_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_encoder.return_value = None  # Should not be called

            result = clip_tokens(text, max_tokens, num_input_tokens=num_input_tokens)
            assert result == text
            mock_encoder.assert_not_called()

    def test_special_characters_and_unicode(self):
        """Test text with special characters and Unicode content."""
        text = "Special chars: @#$%^&*()_+ áéíóú 中문 🚀 emoji"
        max_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)
            assert isinstance(result, str)
            assert len(result) < len(text)

    def test_multiline_text_handling(self):
        """Test handling of multiline text."""
        text = "Line 1\nLine 2\nLine 3\nLine 4\nLine 5"
        max_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)
            assert isinstance(result, str)

    def test_very_long_text(self):
        """Test with very long text."""
        text = "A" * 10000  # Very long text
        max_tokens = 10

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 5000  # Many tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)
            assert len(result) < len(text)
            assert result.endswith("\n...(truncated)")

    def test_encoder_exception_handling(self):
        """Test handling of encoder exceptions."""
        text = "Test text"
        max_tokens = 10

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_encoder.side_effect = Exception("Encoder error")

            # Should return original text when encoder fails
            result = clip_tokens(text, max_tokens)
            assert result == text

    def test_zero_division_scenario(self):
        """Test scenario that could lead to division by zero."""
        text = "Test"
        max_tokens = 10

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = []  # Empty tokens (could cause division by zero)
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)
            # Should handle gracefully and return original text
            assert result == text

    def test_various_edge_cases(self):
        """Test various edge cases."""
        # Single character
        assert clip_tokens("A", 1000) == "A"

        # Only whitespace
        text = "   \n  \t  "
        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 10
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, 5)
            assert isinstance(result, str)

        # Text with only newlines
        text = "\n\n\n\n"
        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 10
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, 2, delete_last_line=True)
            assert isinstance(result, str)

    def test_parameter_combinations(self):
        """Test different parameter combinations."""
        text = "Multi\nline\ntext\nfor\ntesting"
        max_tokens = 5

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20
            mock_encoder.return_value = mock_tokenizer

            # Test all combinations
            combinations = [
                (True, True),   # add_three_dots=True, delete_last_line=True
                (True, False),  # add_three_dots=True, delete_last_line=False
                (False, True),  # add_three_dots=False, delete_last_line=True
                (False, False), # add_three_dots=False, delete_last_line=False
            ]

            for add_dots, delete_line in combinations:
                result = clip_tokens(text, max_tokens,
                                     add_three_dots=add_dots,
                                     delete_last_line=delete_line)
                assert isinstance(result, str)
                if add_dots and len(result) > 0:
                    assert result.endswith("\n...(truncated)") or result == text

    def test_num_output_chars_zero_scenario(self):
        """Test scenario where num_output_chars becomes zero or negative."""
        text = "Short"
        max_tokens = 1

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 1000  # Many tokens for short text
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)
            # When num_output_chars is 0 or negative, should return empty string
            assert result == ""

    def test_logging_on_exception(self):
        """Test that exceptions are properly logged."""
        text = "Test text"
        max_tokens = 10

        # Patch the logger at the module level where it's imported
        with patch('pr_agent.algo.utils.get_logger') as mock_logger:
            mock_log_instance = MagicMock()
            mock_logger.return_value = mock_log_instance

            with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
                mock_encoder.side_effect = Exception("Test exception")

                result = clip_tokens(text, max_tokens)

                # Should log the warning
                mock_log_instance.warning.assert_called_once()
                # Should return original text
                assert result == text

    def test_factor_safety_calculation(self):
        """Test that the 0.9 factor (10% reduction) works correctly."""
        text = "Test text that should be reduced by 10 percent for safety"
        max_tokens = 10

        with patch.object(TokenEncoder, 'get_token_encoder') as mock_encoder:
            mock_tokenizer = MagicMock()
            mock_tokenizer.encode.return_value = [1] * 20  # 20 tokens
            mock_encoder.return_value = mock_tokenizer

            result = clip_tokens(text, max_tokens)

            # The result should be shorter due to the 0.9 factor
            # Characters per token = len(text) / 20
            # Expected chars = int(0.9 * (len(text) / 20) * 10)
            expected_chars = int(0.9 * (len(text) / 20) * 10)

            # Result should be around expected_chars length (plus truncation text)
            if result.endswith("\n...(truncated)"):
                actual_content = result[:-len("\n...(truncated)")]
                assert len(actual_content) <= expected_chars + 5  # Some tolerance

    # Test the original basic functionality to ensure backward compatibility
    def test_clip_original_functionality(self):
        """Test original functionality from the existing test."""
        text = "line1\nline2\nline3\nline4\nline5\nline6"
        max_tokens = 25
        result = clip_tokens(text, max_tokens)
        assert result == text

        max_tokens = 10
        result = clip_tokens(text, max_tokens)
        expected_results = 'line1\nline2\nline3\n\n...(truncated)'
        assert result == expected_results

================================================
FILE: tests/unittest/test_codecommit_client.py
================================================
from unittest.mock import MagicMock

from pr_agent.git_providers.codecommit_client import CodeCommitClient


class TestCodeCommitProvider:
    def test_get_differences(self):
        # Create a mock CodeCommitClient instance and codecommit_client member
        api = CodeCommitClient()
        api.boto_client = MagicMock()

        # Mock the response from the AWS client for get_differences method
        api.boto_client.get_paginator.return_value.paginate.return_value = [
            {
                "differences": [
                    {
                        "beforeBlob": {
                            "path": "file1.py",
                            "blobId": "291b15c3ab4219e43a5f4f9091e5a97ee9d7400b",
                        },
                        "afterBlob": {
                            "path": "file1.py",
                            "blobId": "46ad86582da03cc34c804c24b17976571bca1eba",
                        },
                        "changeType": "M",
                    },
                    {
                        "beforeBlob": {"path": "", "blobId": ""},
                        "afterBlob": {
                            "path": "file2.py",
                            "blobId": "2404c7874fcbd684d6779c1420072f088647fd79",
                        },
                        "changeType": "A",
                    },
                    {
                        "beforeBlob": {
                            "path": "file3.py",
                            "blobId": "9af7989045ce40e9478ebb8089dfbadac19a9cde",
                        },
                        "afterBlob": {"path": "", "blobId": ""},
                        "changeType": "D",
                    },
                    {
                        "beforeBlob": {
                            "path": "file5.py",
                            "blobId": "738e36eec120ef9d6393a149252698f49156d5b4",
                        },
                        "afterBlob": {
                            "path": "file6.py",
                            "blobId": "faecdb85f7ba199df927a783b261378a1baeca85",
                        },
                        "changeType": "R",
                    },
                ]
            }
        ]

        diffs = api.get_differences("my_test_repo", "commit1", "commit2")

        assert len(diffs) == 4
        assert diffs[0].before_blob_path == "file1.py"
        assert diffs[0].before_blob_id == "291b15c3ab4219e43a5f4f9091e5a97ee9d7400b"
        assert diffs[0].after_blob_path == "file1.py"
        assert diffs[0].after_blob_id == "46ad86582da03cc34c804c24b17976571bca1eba"
        assert diffs[0].change_type == "M"
        assert diffs[1].before_blob_path == ""
        assert diffs[1].before_blob_id == ""
        assert diffs[1].after_blob_path == "file2.py"
        assert diffs[1].after_blob_id == "2404c7874fcbd684d6779c1420072f088647fd79"
        assert diffs[1].change_type == "A"
        assert diffs[2].before_blob_path == "file3.py"
        assert diffs[2].before_blob_id == "9af7989045ce40e9478ebb8089dfbadac19a9cde"
        assert diffs[2].after_blob_path == ""
        assert diffs[2].after_blob_id == ""
        assert diffs[2].change_type == "D"
        assert diffs[3].before_blob_path == "file5.py"
        assert diffs[3].before_blob_id == "738e36eec120ef9d6393a149252698f49156d5b4"
        assert diffs[3].after_blob_path == "file6.py"
        assert diffs[3].after_blob_id == "faecdb85f7ba199df927a783b261378a1baeca85"
        assert diffs[3].change_type == "R"

    def test_get_file(self):
        # Create a mock CodeCommitClient instance and codecommit_client member
        api = CodeCommitClient()
        api.boto_client = MagicMock()

        # Mock the response from the AWS client for get_pull_request method
        # def get_file(self, repo_name: str, file_path: str, sha_hash: str):
        api.boto_client.get_file.return_value = {
            "commitId": "6335d6d4496e8d50af559560997604bb03abc122",
            "blobId": "c172209495d7968a8fdad76469564fb708460bc1",
            "filePath": "requirements.txt",
            "fileSize": 65,
            "fileContent": b"boto3==1.28.25\ndynaconf==3.1.12\nfastapi==0.99.0\nPyGithub==1.59.*\n",
        }

        repo_name = "my_test_repo"
        file_path = "requirements.txt"
        sha_hash = "84114a356ece1e5b7637213c8e486fea7c254656"
        content = api.get_file(repo_name, file_path, sha_hash)

        assert len(content) == 65
        assert content == b"boto3==1.28.25\ndynaconf==3.1.12\nfastapi==0.99.0\nPyGithub==1.59.*\n"
        assert content.decode("utf-8") == "boto3==1.28.25\ndynaconf==3.1.12\nfastapi==0.99.0\nPyGithub==1.59.*\n"

    def test_get_pr(self):
        # Create a mock CodeCommitClient instance and codecommit_client member
        api = CodeCommitClient()
        api.boto_client = MagicMock()

        # Mock the response from the AWS client for get_pull_request method
        api.boto_client.get_pull_request.return_value = {
            "pullRequest": {
                "pullRequestId": "321",
                "title": "My PR",
                "description": "My PR description",
                "pullRequestTargets": [
                    {
                        "sourceCommit": "commit1",
                        "sourceReference": "branch1",
                        "destinationCommit": "commit2",
                        "destinationReference": "branch2",
                        "repositoryName": "my_test_repo",
                    }
                ],
            }
        }

        pr = api.get_pr("my_test_repo", 321)

        assert pr.title == "My PR"
        assert pr.description == "My PR description"
        assert len(pr.targets) == 1
        assert pr.targets[0].source_commit == "commit1"
        assert pr.targets[0].source_branch == "branch1"
        assert pr.targets[0].destination_commit == "commit2"
        assert pr.targets[0].destination_branch == "branch2"


================================================
FILE: tests/unittest/test_codecommit_provider.py
================================================
from unittest.mock import patch

import pytest

from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.git_providers.codecommit_provider import CodeCommitFile, CodeCommitProvider, PullRequestCCMimic


class TestCodeCommitFile:
    # Test that a CodeCommitFile object is created successfully with valid parameters.
    # Generated by CodiumAI
    def test_valid_parameters(self):
        a_path = "path/to/file_a"
        a_blob_id = "12345"
        b_path = "path/to/file_b"
        b_blob_id = "67890"
        edit_type = EDIT_TYPE.ADDED

        file = CodeCommitFile(a_path, a_blob_id, b_path, b_blob_id, edit_type)

        assert file.a_path == a_path
        assert file.a_blob_id == a_blob_id
        assert file.b_path == b_path
        assert file.b_blob_id == b_blob_id
        assert file.edit_type == edit_type
        assert file.filename == b_path


class TestCodeCommitProvider:
    def test_get_title(self):
        # Test that the get_title() function returns the PR title
        with patch.object(CodeCommitProvider, "__init__", lambda x, y: None):
            provider = CodeCommitProvider(None)
            provider.pr = PullRequestCCMimic("My Test PR Title", [])
            assert provider.get_title() == "My Test PR Title"

    def test_get_pr_id(self):
        # Test that the get_pr_id() function returns the correct ID
        with patch.object(CodeCommitProvider, "__init__", lambda x, y: None):
            provider = CodeCommitProvider(None)
            provider.repo_name = "my_test_repo"
            provider.pr_num = 321
            assert provider.get_pr_id() == "my_test_repo/321"

    def test_parse_pr_url(self):
        # Test that the _parse_pr_url() function can extract the repo name and PR number from a CodeCommit URL
        url = "https://us-east-1.console.aws.amazon.com/codesuite/codecommit/repositories/my_test_repo/pull-requests/321"
        repo_name, pr_number = CodeCommitProvider._parse_pr_url(url)
        assert repo_name == "my_test_repo"
        assert pr_number == 321

    def test_is_valid_codecommit_hostname(self):
        # Test the various AWS regions
        assert CodeCommitProvider._is_valid_codecommit_hostname("af-south-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-east-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-northeast-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-northeast-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-northeast-3.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-south-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-south-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-southeast-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-southeast-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-southeast-3.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ap-southeast-4.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("ca-central-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-central-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-central-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-north-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-south-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-south-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-west-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-west-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("eu-west-3.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("il-central-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("me-central-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("me-south-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("sa-east-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("us-east-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("us-east-2.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("us-gov-east-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("us-gov-west-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("us-west-1.console.aws.amazon.com")
        assert CodeCommitProvider._is_valid_codecommit_hostname("us-west-2.console.aws.amazon.com")
        # Test non-AWS regions
        assert not CodeCommitProvider._is_valid_codecommit_hostname("no-such-region.console.aws.amazon.com")
        assert not CodeCommitProvider._is_valid_codecommit_hostname("console.aws.amazon.com")

    # Test that an error is raised when an invalid CodeCommit URL is provided to the set_pr() method of the CodeCommitProvider class.
    # Generated by CodiumAI
    def test_invalid_codecommit_url(self):
        provider = CodeCommitProvider()
        with pytest.raises(ValueError):
            provider.set_pr("https://example.com/codecommit/repositories/my_test_repo/pull-requests/4321")

    def test_get_file_extensions(self):
        filenames = [
            "app.py",
            "cli.py",
            "composer.json",
            "composer.lock",
            "hello.py",
            "image1.jpg",
            "image2.JPG",
            "index.js",
            "provider.py",
            "README",
            "test.py",
        ]
        expected_extensions = [
            ".py",
            ".py",
            ".json",
            ".lock",
            ".py",
            ".jpg",
            ".jpg",
            ".js",
            ".py",
            "",
            ".py",
        ]
        extensions = CodeCommitProvider._get_file_extensions(filenames)
        assert extensions == expected_extensions

    def test_get_language_percentages(self):
        extensions = [
            ".py",
            ".py",
            ".json",
            ".lock",
            ".py",
            ".jpg",
            ".jpg",
            ".js",
            ".py",
            "",
            ".py",
        ]
        percentages = CodeCommitProvider._get_language_percentages(extensions)
        assert percentages[".py"] == 45
        assert percentages[".json"] == 9
        assert percentages[".lock"] == 9
        assert percentages[".jpg"] == 18
        assert percentages[".js"] == 9
        assert percentages[""] == 9

        # The _get_file_extensions function needs the "." prefix on the extension,
        # but the _get_language_percentages function will work with or without the "." prefix
        extensions = [
            "txt",
            "py",
            "py",
        ]
        percentages = CodeCommitProvider._get_language_percentages(extensions)
        assert percentages["py"] == 67
        assert percentages["txt"] == 33

        # test an empty list
        percentages = CodeCommitProvider._get_language_percentages([])
        assert percentages == {}

    def test_get_edit_type(self):
        # Test that the _get_edit_type() function can convert a CodeCommit letter to an EDIT_TYPE enum
        assert CodeCommitProvider._get_edit_type("A") == EDIT_TYPE.ADDED
        assert CodeCommitProvider._get_edit_type("D") == EDIT_TYPE.DELETED
        assert CodeCommitProvider._get_edit_type("M") == EDIT_TYPE.MODIFIED
        assert CodeCommitProvider._get_edit_type("R") == EDIT_TYPE.RENAMED

        assert CodeCommitProvider._get_edit_type("a") == EDIT_TYPE.ADDED
        assert CodeCommitProvider._get_edit_type("d") == EDIT_TYPE.DELETED
        assert CodeCommitProvider._get_edit_type("m") == EDIT_TYPE.MODIFIED
        assert CodeCommitProvider._get_edit_type("r") == EDIT_TYPE.RENAMED

        assert CodeCommitProvider._get_edit_type("X") is None

    def test_add_additional_newlines(self):
        # a short string to test adding double newlines
        input = "abc\ndef\n\n___\nghi\njkl\nmno\n\npqr\n"
        expect = "abc\n\ndef\n\n___\n\nghi\n\njkl\n\nmno\n\npqr\n\n"
        assert CodeCommitProvider._add_additional_newlines(input) == expect
        # a test example from a real PR
        input = "## PR Type:\nEnhancement\n\n___\n## PR Description:\nThis PR introduces a new feature to the script, allowing users to filter servers by name.\n\n___\n## PR Main Files Walkthrough:\n`foo`: The foo script has been updated to include a new command line option `-f` or `--filter`.\n`bar`: The bar script has been updated to list stopped servers.\n"
        expect = "## PR Type:\n\nEnhancement\n\n___\n\n## PR Description:\n\nThis PR introduces a new feature to the script, allowing users to filter servers by name.\n\n___\n\n## PR Main Files Walkthrough:\n\n`foo`: The foo script has been updated to include a new command line option `-f` or `--filter`.\n\n`bar`: The bar script has been updated to list stopped servers.\n\n"
        assert CodeCommitProvider._add_additional_newlines(input) == expect

    def test_remove_markdown_html(self):
        input = "## PR Feedback\n<details><summary>Code feedback:</summary>\nfile foo\n</summary>\n"
        expect = "## PR Feedback\nCode feedback:\nfile foo\n\n"
        assert CodeCommitProvider._remove_markdown_html(input) == expect


================================================
FILE: tests/unittest/test_config_loader_secrets.py
================================================
from unittest.mock import MagicMock, patch

from pr_agent.config_loader import apply_secrets_manager_config, apply_secrets_to_config


class TestConfigLoaderSecrets:

    def test_apply_secrets_manager_config_success(self):
        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider, \
             patch('pr_agent.config_loader.apply_secrets_to_config') as mock_apply_secrets, \
             patch('pr_agent.config_loader.get_settings') as mock_get_settings:

            # Mock secret provider
            mock_provider = MagicMock()
            mock_provider.get_all_secrets.return_value = {'openai.key': 'sk-test'}
            mock_get_provider.return_value = mock_provider

            # Mock settings
            settings = MagicMock()
            settings.get.return_value = "aws_secrets_manager"
            mock_get_settings.return_value = settings

            apply_secrets_manager_config()

            mock_apply_secrets.assert_called_once_with({'openai.key': 'sk-test'})

    def test_apply_secrets_manager_config_no_provider(self):
        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider:
            mock_get_provider.return_value = None

            # Confirm no exception is raised
            apply_secrets_manager_config()

    def test_apply_secrets_manager_config_not_aws(self):
        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider, \
             patch('pr_agent.config_loader.get_settings') as mock_get_settings:

            # Mock Google Cloud Storage provider
            mock_provider = MagicMock()
            mock_get_provider.return_value = mock_provider

            # Mock settings (Google Cloud Storage)
            settings = MagicMock()
            settings.get.return_value = "google_cloud_storage"
            mock_get_settings.return_value = settings

            # Confirm execution is skipped for non-AWS Secrets Manager
            apply_secrets_manager_config()
            
            # Confirm get_all_secrets is not called
            assert not hasattr(mock_provider, 'get_all_secrets') or \
                   not mock_provider.get_all_secrets.called

    def test_apply_secrets_to_config_nested_keys(self):
        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = None  # No existing value
            settings.set = MagicMock()
            mock_get_settings.return_value = settings

            secrets = {
                'openai.key': 'sk-test',
                'github.webhook_secret': 'webhook-secret'
            }

            apply_secrets_to_config(secrets)

            # Confirm settings are applied correctly
            settings.set.assert_any_call('OPENAI.KEY', 'sk-test')
            settings.set.assert_any_call('GITHUB.WEBHOOK_SECRET', 'webhook-secret')

    def test_apply_secrets_to_config_existing_value_preserved(self):
        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = "existing-value"  # Existing value present
            settings.set = MagicMock()
            mock_get_settings.return_value = settings

            secrets = {'openai.key': 'sk-test'}

            apply_secrets_to_config(secrets)

            # Confirm settings are not overridden when existing value present
            settings.set.assert_not_called()

    def test_apply_secrets_to_config_single_key(self):
        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = None
            settings.set = MagicMock()
            mock_get_settings.return_value = settings

            secrets = {'simple_key': 'simple_value'}

            apply_secrets_to_config(secrets)

            # Confirm non-dot notation keys are ignored
            settings.set.assert_not_called()

    def test_apply_secrets_to_config_multiple_dots(self):
        with patch('pr_agent.config_loader.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = None
            settings.set = MagicMock()
            mock_get_settings.return_value = settings

            secrets = {'section.subsection.key': 'value'}

            apply_secrets_to_config(secrets)

            # Confirm keys with multiple dots are ignored
            settings.set.assert_not_called()

    def test_apply_secrets_manager_config_exception_handling(self):
        with patch('pr_agent.secret_providers.get_secret_provider') as mock_get_provider:
            mock_get_provider.side_effect = Exception("Provider error")

            # Confirm processing continues even when exception occurs
            apply_secrets_manager_config()  # Confirm no exception is raised 


================================================
FILE: tests/unittest/test_convert_to_markdown.py
================================================
# Generated by CodiumAI
import textwrap
from unittest.mock import Mock

from pr_agent.algo.utils import PRReviewHeader, convert_to_markdown_v2
from pr_agent.tools.pr_description import insert_br_after_x_chars

"""
Code Analysis

Objective:
The objective of the 'convert_to_markdown' function is to convert a dictionary of data into a markdown-formatted text.
The function takes in a dictionary as input and recursively iterates through its keys and values to generate the
markdown text.

Inputs:
- A dictionary of data containing information about a pull request.

Flow:
- Initialize an empty string variable 'markdown_text'.
- Create a dictionary 'emojis' containing emojis for each key in the input dictionary.
- Iterate through the input dictionary:
  - If the value is empty, continue to the next iteration.
  - If the value is a dictionary, recursively call the 'convert_to_markdown' function with the value as input and
  append the returned markdown text to 'markdown_text'.
  - If the value is a list:
    - If the key is 'code suggestions', add an additional line break to 'markdown_text'.
    - Get the corresponding emoji for the key from the 'emojis' dictionary. If no emoji is found, use a dash.
    - Append the emoji and key to 'markdown_text'.
    - Iterate through the items in the list:
      - If the item is a dictionary and the key is 'code suggestions', call the 'parse_code_suggestion' function with
      the item as input and append the returned markdown text to 'markdown_text'.
      - If the item is not empty, append it to 'markdown_text'.
  - If the value is not 'n/a', get the corresponding emoji for the key from the 'emojis' dictionary. If no emoji is
  found, use a dash. Append the emoji, key, and value to 'markdown_text'.
- Return 'markdown_text'.

Outputs:
- A markdown-formatted string containing the information from the input dictionary.

Additional aspects:
- The function uses recursion to handle nested dictionaries.
- The 'parse_code_suggestion' function is called for items in the 'code suggestions' list.
- The function uses emojis to add visual cues to the markdown text.
"""


class TestConvertToMarkdown:
    # Tests that the function works correctly with a simple dictionary input
    def test_simple_dictionary_input(self):
        input_data = {'review': {
            'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n',
            'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}}

        expected_output = textwrap.dedent(f"""\
            {PRReviewHeader.REGULAR.value} 🔍

            Here are some key observations to aid the review process:

            <table>
            <tr><td>⏱️&nbsp;<strong>Estimated effort to review</strong>: 1 🔵⚪⚪⚪⚪</td></tr>
            <tr><td>🧪&nbsp;<strong>No relevant tests</strong></td></tr>
            <tr><td>&nbsp;<strong>Possible issues</strong>: No
            </td></tr>
            <tr><td>🔒&nbsp;<strong>No security concerns identified</strong></td></tr>
            </table>
        """)

        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()

    def test_simple_dictionary_input_without_gfm_supported(self):
        input_data = {'review': {
            'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n',
            'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}}

        expected_output = textwrap.dedent("""\
            ## PR Reviewer Guide 🔍

            Here are some key observations to aid the review process:

            ### ⏱️ Estimated effort to review: 1 🔵⚪⚪⚪⚪

            ### 🧪 No relevant tests

            ###  Possible issues: No


            ### 🔒 No security concerns identified
        """)

        assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output.strip()

    def test_key_issues_to_review(self):
        input_data = {'review': {
            'key_issues_to_review': [
                {
                    'relevant_file': 'src/utils.py',
                    'issue_header': 'Code Smell',
                    'issue_content': 'The function is too long and complex.',
                    'start_line': 30,
                    'end_line': 50,
                }
            ]
        }}
        mock_git_provider = Mock()
        reference_link = 'https://github.com/qodo/pr-agent/pull/1/files#diff-hashvalue-R174'
        mock_git_provider.get_line_link.return_value = reference_link

        expected_output = textwrap.dedent(f"""\
            ## PR Reviewer Guide 🔍

            Here are some key observations to aid the review process:

            <table>
            <tr><td>⚡&nbsp;<strong>Recommended focus areas for review</strong><br><br>

            <a href='{reference_link}'><strong>Code Smell</strong></a><br>The function is too long and complex.

            </td></tr>
            </table>
        """)

        assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip()
        mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50)

    def test_ticket_compliance(self):
        input_data = {'review': {
            'ticket_compliance_check': [
                {
                    'ticket_url': 'https://example.com/ticket/123',
                    'ticket_requirements': '- Requirement 1\n- Requirement 2\n',
                    'fully_compliant_requirements': '- Requirement 1\n- Requirement 2\n',
                    'not_compliant_requirements': '',
                    'requires_further_human_verification': '',
                }
            ]
        }}

        expected_output = textwrap.dedent("""\
            ## PR Reviewer Guide 🔍

            Here are some key observations to aid the review process:

            <table>
            <tr><td>

            **🎫 Ticket compliance analysis ✅**


            **[123](https://example.com/ticket/123) - Fully compliant**

            Compliant requirements:

            - Requirement 1
            - Requirement 2


            </td></tr>
            </table>
        """)

        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()

    def test_can_be_split(self):
        input_data = {'review': {
            'can_be_split': [
                {
                    'relevant_files': [
                        'src/file1.py',
                        'src/file2.py'
                    ],
                    'title': 'Refactoring',
                },
                {
                    'relevant_files': [
                        'src/file3.py'
                    ],
                    'title': 'Bug Fix',
                }
            ]
        }
        }

        expected_output = textwrap.dedent("""\
            ## PR Reviewer Guide 🔍

            Here are some key observations to aid the review process:

            <table>
            <tr><td>🔀 <strong>Multiple PR themes</strong><br><br>

            <details><summary>
            Sub-PR theme: <b>Refactoring</b></summary>

            ___

            Relevant files:

            - src/file1.py
            - src/file2.py
            ___

            </details>

            <details><summary>
            Sub-PR theme: <b>Bug Fix</b></summary>

            ___

            Relevant files:

            - src/file3.py
            ___

            </details>

            </td></tr>
            </table>
        """)

        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()

    def test_contribution_time_cost_estimate(self):
        input_data = {
            'review': {
                'contribution_time_cost_estimate': {
                    'best_case': '1h',
                    'average_case': '2h',
                    'worst_case': '30m',
                }
            }
        }

        expected_output = textwrap.dedent(f"""
            {PRReviewHeader.REGULAR.value} 🔍

            Here are some key observations to aid the review process:

            <table>
            <tr><td>⏳&nbsp;<strong>Contribution time estimate</strong> (best, average, worst case): 1h | 2h | 30 minutes</td></tr>
            </table>
        """)
        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()

        # Non-GFM branch
        expected_output_no_gfm = textwrap.dedent(f"""
        {PRReviewHeader.REGULAR.value} 🔍

        Here are some key observations to aid the review process:

        ### ⏳ Contribution time estimate (best, average, worst case): 1h | 2h | 30 minutes

        """)
        assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output_no_gfm.strip()


    # Tests that the function works correctly with an empty dictionary input
    def test_empty_dictionary_input(self):
        input_data = {}

        expected_output = ''

        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()

    def test_dictionary_with_empty_dictionaries(self):
        input_data = {'review': {}}

        expected_output = ''

        assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()


class TestBR:
    def test_br1(self):
        file_change_description = '- Imported `FilePatchInfo` and `EDIT_TYPE` from `pr_agent.algo.types` instead of `pr_agent.git_providers.git_provider`.'
        file_change_description_br = insert_br_after_x_chars(file_change_description)
        expected_output = ('<ul><li>Imported <code>FilePatchInfo</code> and <code>EDIT_TYPE</code> from '
                           '<code>pr_agent.algo.types</code> instead <br>of '
                           '<code>pr_agent.git_providers.git_provider</code>.</ul>')
        assert file_change_description_br == expected_output
        # print("-----")
        # print(file_change_description_br)

    def test_br2(self):
        file_change_description = (
            '- Created a - new -class `ColorPaletteResourcesCollection ColorPaletteResourcesCollection '
            'ColorPaletteResourcesCollection ColorPaletteResourcesCollection`')
        file_change_description_br = insert_br_after_x_chars(file_change_description)
        expected_output = ('<ul><li>Created a - new -class <code>ColorPaletteResourcesCollection </code><br><code>'
                           'ColorPaletteResourcesCollection ColorPaletteResourcesCollection '
                           '</code><br><code>ColorPaletteResourcesCollection</code></ul>')
        assert file_change_description_br == expected_output
        # print("-----")
        # print(file_change_description_br)

    def test_br3(self):
        file_change_description = 'Created a new class `ColorPaletteResourcesCollection` which extends `AvaloniaDictionary<ThemeVariant, ColorPaletteResources>` and implements aaa'
        file_change_description_br = insert_br_after_x_chars(file_change_description)
        assert file_change_description_br == ('Created a new class <code>ColorPaletteResourcesCollection</code> which '
                                              'extends <br><code>AvaloniaDictionary<ThemeVariant, ColorPaletteResources>'
                                              '</code> and implements <br>aaa')
        # print("-----")
        # print(file_change_description_br)


================================================
FILE: tests/unittest/test_delete_hunks.py
================================================
# Generated by CodiumAI

from pr_agent.algo.git_patch_processing import omit_deletion_hunks

"""
Code Analysis

Objective:
The objective of the "omit_deletion_hunks" function is to remove deletion hunks from a patch file and return only the
added lines.

Inputs:
- "patch_lines": a list of strings representing the lines of a patch file.

Flow:
- Initialize empty lists "temp_hunk" and "added_patched", and boolean variables "add_hunk" and "inside_hunk".
- Compile a regular expression pattern to match hunk headers.
- Iterate through each line in "patch_lines".
- If the line starts with "@@", match the line with the hunk header pattern, finish the previous hunk if necessary,
and append the line to "temp_hunk".
- If the line does not start with "@@", append the line to "temp_hunk", check if it is an added line, and set
"add_hunk" to True if it is.
- If the function reaches the end of "patch_lines" and there is an unfinished hunk with added lines, append it to
"added_patched".
- Join the lines in "added_patched" with newline characters and return the resulting string.

Outputs:
- A string representing the added lines in the patch file.

Additional aspects:
- The function only considers hunks with added lines and ignores hunks with deleted lines.
- The function assumes that the input patch file is well-formed and follows the unified diff format.
"""


class TestOmitDeletionHunks:
    # Tests that the function correctly handles a simple patch containing only additions
    def test_simple_patch_additions(self):
        patch_lines = ['@@ -1,0 +1,1 @@\n', '+added line\n']
        expected_output = '@@ -1,0 +1,1 @@\n\n+added line\n'
        assert omit_deletion_hunks(patch_lines) == expected_output

    # Tests that the function correctly omits deletion hunks and concatenates multiple hunks in a patch.
    def test_patch_multiple_hunks(self):
        patch_lines = ['@@ -1,0 +1,1 @@\n', '-deleted line', '+added line\n', '@@ -2,0 +3,1 @@\n', '-deleted line\n',
                       '-another deleted line\n']
        expected_output = '@@ -1,0 +1,1 @@\n\n-deleted line\n+added line\n'
        assert omit_deletion_hunks(patch_lines) == expected_output

    # Tests that the function correctly omits deletion lines from the patch when there are no additions or context
    # lines.
    def test_patch_only_deletions(self):
        patch_lines = ['@@ -1,1 +1,0 @@\n', '-deleted line\n']
        expected_output = ''
        assert omit_deletion_hunks(patch_lines) == expected_output

        # Additional deletion lines
        patch_lines = ['@@ -1,1 +1,0 @@\n', '-deleted line\n', '-another deleted line\n']
        expected_output = ''
        assert omit_deletion_hunks(patch_lines) == expected_output

        # Additional context lines
        patch_lines = ['@@ -1,1 +1,0 @@\n', '-deleted line\n', '-another deleted line\n', 'context line 1\n',
                       'context line 2\n', 'context line 3\n']
        expected_output = ''
        assert omit_deletion_hunks(patch_lines) == expected_output

    # Tests that the function correctly handles an empty patch
    def test_empty_patch(self):
        patch_lines = []
        expected_output = ''
        assert omit_deletion_hunks(patch_lines) == expected_output

    # Tests that the function correctly handles a patch containing only one hunk
    def test_patch_one_hunk(self):
        patch_lines = ['@@ -1,0 +1,1 @@\n', '+added line\n']
        expected_output = '@@ -1,0 +1,1 @@\n\n+added line\n'
        assert omit_deletion_hunks(patch_lines) == expected_output

    # Tests that the function correctly handles a patch containing only deletions and no additions
    def test_patch_deletions_no_additions(self):
        patch_lines = ['@@ -1,1 +1,0 @@\n', '-deleted line\n']
        expected_output = ''
        assert omit_deletion_hunks(patch_lines) == expected_output


================================================
FILE: tests/unittest/test_extend_patch.py
================================================
import pytest

from pr_agent.algo.git_patch_processing import extend_patch
from pr_agent.algo.pr_processing import pr_generate_extended_diff
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import load_large_diff
from pr_agent.config_loader import get_settings

get_settings(use_context=False).set("CONFIG.CLI_MODE", True)
get_settings(use_context=False).config.allow_dynamic_context = False


class TestExtendPatch:
    # Tests that the function works correctly with valid input
    def test_happy_path(self):
        original_file_str = 'line1\nline2\nline3\nline4\nline5'
        patch_str = '@@ -2,2 +2,2 @@ init()\n-line2\n+new_line2\n line3'
        num_lines = 1
        expected_output = '\n@@ -1,4 +1,4 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4'
        actual_output = extend_patch(original_file_str, patch_str,
                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
        assert actual_output == expected_output

    # Tests that the function returns an empty string when patch_str is empty
    def test_empty_patch(self):
        original_file_str = 'line1\nline2\nline3\nline4\nline5'
        patch_str = ''
        num_lines = 1
        expected_output = ''
        assert extend_patch(original_file_str, patch_str,
                            patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) == expected_output

    # Tests that the function returns the original patch when num_lines is 0
    def test_zero_num_lines(self):
        original_file_str = 'line1\nline2\nline3\nline4\nline5'
        patch_str = '@@ -2,2 +2,2 @@ init()\n-line2\n+new_line2\nline3'
        num_lines = 0
        assert extend_patch(original_file_str, patch_str,
                            patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines) == patch_str

    # Tests that the function returns the original patch when patch_str contains no hunks
    def test_no_hunks(self):
        original_file_str = 'line1\nline2\nline3\nline4\nline5'
        patch_str = 'no hunks here'
        num_lines = 1
        expected_output = 'no hunks here'
        assert extend_patch(original_file_str, patch_str, num_lines) == expected_output

    # Tests that the function extends a patch with a single hunk correctly
    def test_single_hunk(self):
        original_file_str = 'line1\nline2\nline3\nline4\nline5'
        patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\n line3\n line4'

        for num_lines in [1, 2, 3]: # check that even if we are over the number of lines in the file, the function still works
            expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5'
            actual_output = extend_patch(original_file_str, patch_str,
                                         patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
            assert actual_output == expected_output

    # Tests the functionality of extending a patch with multiple hunks.
    def test_multiple_hunks(self):
        original_file_str = 'line1\nline2\nline3\nline4\nline5\nline6'
        patch_str = '@@ -2,3 +2,3 @@ init()\n-line2\n+new_line2\n line3\n line4\n@@ -4,1 +4,1 @@ init2()\n-line4\n+new_line4'  # noqa: E501
        num_lines = 1
        original_allow_dynamic_context = get_settings(use_context=False).config.allow_dynamic_context

        get_settings(use_context=False).config.allow_dynamic_context = False
        expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501
        actual_output = extend_patch(original_file_str, patch_str,
                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
        assert actual_output == expected_output

        get_settings(use_context=False).config.allow_dynamic_context = True
        expected_output = '\n@@ -1,5 +1,5 @@ init()\n line1\n-line2\n+new_line2\n line3\n line4\n line5\n\n@@ -3,3 +3,3 @@ init2()\n line3\n-line4\n+new_line4\n line5' # noqa: E501
        actual_output = extend_patch(original_file_str, patch_str,
                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines)
        assert actual_output == expected_output
        get_settings(use_context=False).config.allow_dynamic_context = original_allow_dynamic_context


    def test_dynamic_context(self):
        get_settings(use_context=False).config.max_extra_lines_before_dynamic_context = 10
        original_file_str = "def foo():"
        for i in range(9):
            original_file_str += f"\n    line({i})"
        patch_str ="@@ -10,1 +10,1 @@ def foo():\n-    line(8)\n+    new_line(8)"
        new_file_str = "\n".join(original_file_str.splitlines()[:-1] + ["    new_line(8)"])
        num_lines=1

        get_settings(use_context=False).config.allow_dynamic_context = True
        actual_output = extend_patch(original_file_str, patch_str,
                                     patch_extra_lines_before=num_lines, patch_extra_lines_after=num_lines, new_file_str=new_file_str)
        expected_output='\n@@ -1,10 +1,10 @@ \n def foo():\n     line(0)\n     line(1)\n     line(2)\n     line(3)\n     line(4)\n     line(5)\n     line(6)\n     line(7)\n-    line(8)\n+    new_line(8)'
        assert actual_output == expected_output

        get_settings(use_context=False).config.allow_dynamic_context = False
        actual_output2 = extend_patch(original_file_str, patch_str,
                                     patch_extra_lines_before=1, patch_extra_lines_after=1)
        expected_output_no_dynamic_context = '\n@@ -9,2 +9,2 @@ def foo():\n     line(7)\n-    line(8)\n+    new_line(8)'
        assert actual_output2 == expected_output_no_dynamic_context

        get_settings(use_context=False).config.allow_dynamic_context = False
        actual_output3 = extend_patch(original_file_str, patch_str,
                                     patch_extra_lines_before=3, patch_extra_lines_after=3)
        expected_output_no_dynamic_context = '\n@@ -7,4 +7,4 @@ def foo():\n     line(5)\n     line(6)\n     line(7)\n-    line(8)\n+    new_line(8)'
        assert actual_output3 == expected_output_no_dynamic_context


class TestExtendedPatchMoreLines:
    class File:
        def __init__(self, base_file, patch, head_file, filename, ai_file_summary=None):
            self.base_file = base_file
            self.patch = patch
            self.head_file = head_file
            self.filename = filename
            self.ai_file_summary = ai_file_summary

    @pytest.fixture
    def token_handler(self):
        # Create a TokenHandler instance with dummy data
        th = TokenHandler(system="System prompt", user="User prompt")
        th.prompt_tokens = 100
        return th

    @pytest.fixture
    def pr_languages(self):
        # Create a list of languages with files containing base_file and patch data
        return [
            {
                'files': [
                    self.File(base_file="line000\nline00\nline0\nline1\noriginal content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10",
                              patch="@@ -5,5 +5,5 @@\n-original content\n+modified content\n line2\n line3\n line4\n line5",
                              head_file="line000\nline00\nline0\nline1\nmodified content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10",
                              filename="file1"),
                    self.File(base_file="original content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nline9\nline10",
                              patch="@@ -6,5 +6,5 @@\nline6\nline7\nline8\n-line9\n+modified line9\nline10",
                              head_file="original content\nline2\nline3\nline4\nline5\nline6\nline7\nline8\nmodified line9\nline10",
                              filename="file2")
                ]
            }
        ]

    def test_extend_patches_with_extra_lines(self, token_handler, pr_languages):
        patches_extended_no_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
            pr_languages, token_handler, add_line_numbers_to_hunks=False,
            patch_extra_lines_before=0,
            patch_extra_lines_after=0
        )

        # Check that with no extra lines, the patches are the same as the original patches
        p0 = patches_extended_no_extra_lines[0].strip()
        p1 = patches_extended_no_extra_lines[1].strip()
        assert p0 == "## File: 'file1'\n\n" + pr_languages[0]['files'][0].patch.strip()
        assert p1 == "## File: 'file2'\n\n" + pr_languages[0]['files'][1].patch.strip()

        patches_extended_with_extra_lines, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
            pr_languages, token_handler, add_line_numbers_to_hunks=False,
            patch_extra_lines_before=2,
            patch_extra_lines_after=1
        )

        p0_extended = patches_extended_with_extra_lines[0].strip()
        assert p0_extended == "## File: 'file1'\n\n@@ -3,8 +3,8 @@ \n line0\n line1\n-original content\n+modified content\n line2\n line3\n line4\n line5\n line6"

class TestLoadLargeDiff:
    def test_no_newline(self):
        patch = load_large_diff("test.py",
                                """\
                                old content 1
                                some new content
                                another line
                                """,
                                """
                                old content 1
                                old content 2""")

        patch_expected="""\
--- 
+++ 
@@ -1,3 +1,3 @@
-
                                 old content 1
-                                old content 2
+                                some new content
+                                another line
"""
        assert patch == patch_expected

    def test_empty_inputs(self):
        assert load_large_diff("test.py", "", "") == ""
        assert load_large_diff("test.py", None, None) == ""
        assert (load_large_diff("test.py", "content\n", "") ==
                '--- \n+++ \n@@ -1 +1 @@\n-\n+content\n')

================================================
FILE: tests/unittest/test_extract_issue_from_branch.py
================================================
import pytest

from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_branch_name


class TestExtractTicketsLinkFromBranchName:
    """Unit tests for branch-name issue extraction (option A: number at start of segment)."""

    def test_feature_slash_number_suffix(self):
        """feature/1-test-issue -> issue #1"""
        result = extract_ticket_links_from_branch_name(
            "feature/1-test-issue", "org/repo", "https://github.com"
        )
        assert result == ["https://github.com/org/repo/issues/1"]

    def test_fix_slash_number_suffix(self):
        """fix/123-bug -> issue #123"""
        result = extract_ticket_links_from_branch_name(
            "fix/123-bug", "owner/repo", "https://github.com"
        )
        assert result == ["https://github.com/owner/repo/issues/123"]

    def test_number_at_start_no_slash(self):
        """123-fix -> issue #123"""
        result = extract_ticket_links_from_branch_name(
            "123-fix", "org/repo", "https://github.com"
        )
        assert result == ["https://github.com/org/repo/issues/123"]

    def test_empty_branch_returns_empty(self):
        """Empty branch name -> []"""
        result = extract_ticket_links_from_branch_name("", "org/repo")
        assert result == []

    def test_none_branch_returns_empty(self):
        """None branch name -> []"""
        result = extract_ticket_links_from_branch_name(None, "org/repo")
        assert result == []

    def test_no_digits_in_segment_returns_empty(self):
        """feature/no-issue -> []"""
        result = extract_ticket_links_from_branch_name(
            "feature/no-issue", "org/repo", "https://github.com"
        )
        assert result == []

    def test_base_url_no_trailing_slash(self):
        """base_url_html without trailing slash is normalized"""
        result = extract_ticket_links_from_branch_name(
            "feature/1-test", "org/repo", "https://github.com/"
        )
        assert result == ["https://github.com/org/repo/issues/1"]

    def test_disable_via_config_returns_empty(self, monkeypatch):
        """When extract_issue_from_branch is False, return []"""
        fake_settings = type("Settings", (), {})()
        fake_settings.get = lambda key, default=None: (
            False if key in ("extract_issue_from_branch", "config.extract_issue_from_branch") else (
                "" if key in ("branch_issue_regex", "config.branch_issue_regex") else default
            )
        )
        import pr_agent.tools.ticket_pr_compliance_check as m
        monkeypatch.setattr(m, "get_settings", lambda: fake_settings)
        result = extract_ticket_links_from_branch_name(
            "feature/1-test", "org/repo", "https://github.com"
        )
        assert result == []

    def test_invalid_custom_regex_returns_empty(self, monkeypatch):
        """When branch_issue_regex is invalid, log and return []"""
        fake_settings = type("Settings", (), {})()
        fake_settings.get = lambda key, default=None: (
            True if key in ("extract_issue_from_branch", "config.extract_issue_from_branch") else (
                "[" if key in ("branch_issue_regex", "config.branch_issue_regex") else default
            )
        )
        import pr_agent.tools.ticket_pr_compliance_check as m
        monkeypatch.setattr(m, "get_settings", lambda: fake_settings)
        result = extract_ticket_links_from_branch_name(
            "feature/1-test", "org/repo", "https://github.com"
        )
        assert result == []

    def test_custom_regex_without_capturing_group_falls_back_to_default(self, monkeypatch):
        """When branch_issue_regex has no capturing group, fall back to default pattern (no crash)."""
        fake_settings = type("Settings", (), {})()
        fake_settings.get = lambda key, default=None: (
            True if key in ("extract_issue_from_branch", "config.extract_issue_from_branch") else (
                r"\d+" if key in ("branch_issue_regex", "config.branch_issue_regex") else default
            )
        )
        import pr_agent.tools.ticket_pr_compliance_check as m
        monkeypatch.setattr(m, "get_settings", lambda: fake_settings)
        result = extract_ticket_links_from_branch_name(
            "feature/1-test", "org/repo", "https://github.com"
        )
        assert result == ["https://github.com/org/repo/issues/1"]

    def test_empty_repo_path_returns_empty(self):
        """Empty repo_path -> [] (guard in function)"""
        result = extract_ticket_links_from_branch_name("feature/1-test", "", "https://github.com")
        assert result == []

    def test_multiple_matches_deduplicated(self):
        """Branch with multiple segments with numbers yields unique issue URLs"""
        result = extract_ticket_links_from_branch_name(
            "feature/1-test/2-other", "org/repo", "https://github.com"
        )
        assert set(result) == {
            "https://github.com/org/repo/issues/1",
            "https://github.com/org/repo/issues/2",
        }


================================================
FILE: tests/unittest/test_fetching_sub_issues.py
================================================
# Currently doing API calls - wrong !


# import unittest
# import asyncio
# from unittest.mock import AsyncMock, patch
# from pr_agent.tools.ticket_pr_compliance_check import extract_tickets, extract_and_cache_pr_tickets
# from pr_agent.git_providers.github_provider import GithubProvider
#
#
# class TestTicketCompliance(unittest.TestCase):
#
#     @patch.object(GithubProvider, 'get_user_description', return_value="Fixes #1 and relates to #2")
#     @patch.object(GithubProvider, '_parse_issue_url', side_effect=lambda url: ("WonOfAKind/KimchiBot", int(url.split('#')[-1])))
#     @patch.object(GithubProvider, 'repo_obj')
#     async def test_extract_tickets(self, mock_repo, mock_parse_issue_url, mock_user_desc):
#         """
#         Test extract_tickets() to ensure it extracts tickets correctly
#         and fetches their content.
#         """
#         github_provider = GithubProvider()
#         github_provider.repo = "WonOfAKind/KimchiBot"
#         github_provider.base_url_html = "https://github.com"
#
#         # Mock issue retrieval
#         mock_issue = AsyncMock()
#         mock_issue.number = 1
#         mock_issue.title = "Sample Issue"
#         mock_issue.body = "This is a test issue body."
#         mock_issue.labels = ["bug", "high priority"]
#
#         # Mock repo object
#         mock_repo.get_issue.return_value = mock_issue
#
#         tickets = await extract_tickets(github_provider)
#
#         # Verify tickets were extracted correctly
#         self.assertIsInstance(tickets, list)
#         self.assertGreater(len(tickets), 0, "Expected at least one ticket!")
#
#         # Verify ticket structure
#         first_ticket = tickets[0]
#         self.assertIn("ticket_id", first_ticket)
#         self.assertIn("ticket_url", first_ticket)
#         self.assertIn("title", first_ticket)
#         self.assertIn("body", first_ticket)
#         self.assertIn("labels", first_ticket)
#
#         print("\n Test Passed: extract_tickets() successfully retrieved ticket info!")
#
#     @patch.object(GithubProvider, 'get_user_description', return_value="Fixes #1 and relates to #2")
#     @patch.object(GithubProvider, '_parse_issue_url', side_effect=lambda url: ("WonOfAKind/KimchiBot", int(url.split('#')[-1])))
#     @patch.object(GithubProvider, 'repo_obj')
#     async def test_extract_and_cache_pr_tickets(self, mock_repo, mock_parse_issue_url, mock_user_desc):
#         """
#         Test extract_and_cache_pr_tickets() to ensure tickets are extracted and cached correctly.
#         """
#         github_provider = GithubProvider()
#         github_provider.repo = "WonOfAKind/KimchiBot"
#         github_provider.base_url_html = "https://github.com"
#
#         vars = {}  # Simulate the dictionary to store results
#
#         # Mock issue retrieval
#         mock_issue = AsyncMock()
#         mock_issue.number = 1
#         mock_issue.title = "Sample Issue"
#         mock_issue.body = "This is a test issue body."
#         mock_issue.labels = ["bug", "high priority"]
#
#         # Mock repo object
#         mock_repo.get_issue.return_value = mock_issue
#
#         # Run function
#         await extract_and_cache_pr_tickets(github_provider, vars)
#
#         # Ensure tickets are cached
#         self.assertIn("related_tickets", vars)
#         self.assertIsInstance(vars["related_tickets"], list)
#         self.assertGreater(len(vars["related_tickets"]), 0, "Expected at least one cached ticket!")
#
#         print("\n Test Passed: extract_and_cache_pr_tickets() successfully cached ticket data!")
#
#     def test_fetch_sub_issues(self):
#         """
#         Test fetch_sub_issues() to ensure sub-issues are correctly retrieved.
#         """
#         github_provider = GithubProvider()
#         issue_url = "https://github.com/WonOfAKind/KimchiBot/issues/1"  # Known issue with sub-issues
#         result = github_provider.fetch_sub_issues(issue_url)
#
#         print("Fetched sub-issues:", result)
#
#         self.assertIsInstance(result, set)  # Ensure result is a set
#         self.assertGreater(len(result), 0, "Expected at least one sub-issue but found none!")
#
#         print("\n Test Passed: fetch_sub_issues() retrieved sub-issues correctly!")
#
#     def test_fetch_sub_issues_with_no_results(self):
#         """
#         Test fetch_sub_issues() to ensure an empty set is returned for an issue with no sub-issues.
#         """
#         github_provider = GithubProvider()
#         issue_url = "https://github.com/qodo-ai/pr-agent/issues/1499"  # Likely non-existent issue
#         result = github_provider.fetch_sub_issues(issue_url)
#
#         print("Fetched sub-issues for non-existent issue:", result)
#
#         self.assertIsInstance(result, set)  # Ensure result is a set
#         self.assertEqual(len(result), 0, "Expected no sub-issues but some were found!")
#
#         print("\n Test Passed: fetch_sub_issues_with_no_results() correctly returned an empty set!")
#
#
# if __name__ == "__main__":
#     asyncio.run(unittest.main())
#
#
#
#
#


================================================
FILE: tests/unittest/test_file_filter.py
================================================
from pr_agent.algo.file_filter import filter_ignored
from pr_agent.config_loader import global_settings


class TestIgnoreFilter:
    def test_no_ignores(self):
        """
        Test no files are ignored when no patterns are specified.
        """
        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file4.py'})(),
            type('', (object,), {'filename': 'file5.py'})()
        ]
        assert filter_ignored(files) == files, "Expected all files to be returned when no ignore patterns are given."

    def test_glob_ignores(self, monkeypatch):
        """
        Test files are ignored when glob patterns are specified.
        """
        monkeypatch.setattr(global_settings.ignore, 'glob', ['*.py'])

        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file4.py'})(),
            type('', (object,), {'filename': 'file5.py'})()
        ]
        expected = [
            files[1],
            files[2]
        ]

        filtered_files = filter_ignored(files)
        assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."

    def test_regex_ignores(self, monkeypatch):
        """
        Test files are ignored when regex patterns are specified.
        """
        monkeypatch.setattr(global_settings.ignore, 'regex', ['^file[2-4]\..*$'])

        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file4.py'})(),
            type('', (object,), {'filename': 'file5.py'})()
        ]
        expected = [
            files[0],
            files[4]
        ]

        filtered_files = filter_ignored(files)
        assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."

    def test_invalid_regex(self, monkeypatch):
        """
        Test invalid patterns are quietly ignored.
        """
        monkeypatch.setattr(global_settings.ignore, 'regex', ['(((||', '^file[2-4]\..*$'])

        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file4.py'})(),
            type('', (object,), {'filename': 'file5.py'})()
        ]
        expected = [
            files[0],
            files[4]
        ]

        filtered_files = filter_ignored(files)
        assert filtered_files == expected, f"Expected {[file.filename for file in expected]}, but got {[file.filename for file in filtered_files]}."
    
    def test_language_framework_ignores(self, monkeypatch):
        """
        Test files are ignored based on language/framework mapping (e.g., protobuf).
        """
        monkeypatch.setattr(global_settings.config, 'ignore_language_framework', ['protobuf', 'go_gen'])

        files = [
            type('', (object,), {'filename': 'main.go'})(),
            type('', (object,), {'filename': 'dir1/service.pb.go'})(),
            type('', (object,), {'filename': 'dir1/dir/data_pb2.py'})(),
            type('', (object,), {'filename': 'file.py'})(),
            type('', (object,), {'filename': 'dir2/file_gen.go'})(),
            type('', (object,), {'filename': 'file.generated.go'})()
        ]
        expected = [
            files[0],
            files[3]
        ]

        filtered = filter_ignored(files)
        assert filtered == expected, (
            f"Expected {[f.filename for f in expected]}, "
            f"but got {[f.filename for f in filtered]}"
        )

    def test_skip_invalid_ignore_language_framework(self, monkeypatch):
        """
        Test skipping of generated code filtering when ignore_language_framework is not a list
        """
        monkeypatch.setattr(global_settings.config, 'ignore_language_framework', 'protobuf')

        files = [
            type('', (object,), {'filename': 'main.go'})(),
            type('', (object,), {'filename': 'file.py'})(),
            type('', (object,), {'filename': 'dir1/service.pb.go'})(),
            type('', (object,), {'filename': 'file_pb2.py'})()
        ]
        expected = [
            files[0],
            files[1],
            files[2],
            files[3]
        ]

        filtered = filter_ignored(files)
        assert filtered == expected, (
            f"Expected {[f.filename for f in expected]}, "
            f"but got {[f.filename for f in filtered]}"
        )


================================================
FILE: tests/unittest/test_find_line_number_of_relevant_line_in_file.py
================================================
# Generated by CodiumAI

from pr_agent.algo.types import FilePatchInfo
from pr_agent.algo.utils import find_line_number_of_relevant_line_in_file


class TestFindLineNumberOfRelevantLineInFile:
    # Tests that the function returns the correct line number and absolute position when the relevant line is found in the patch
    def test_relevant_line_found_in_patch(self):
        diff_files = [
            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+line2\n+relevant_line\n', filename='file1')
        ]
        relevant_file = 'file1'
        relevant_line_in_file = 'relevant_line'
        expected = (3, 2) # (position in patch, absolute_position in new file)
        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected

    # Tests that the function returns the correct line number and absolute position when a similar line is found using difflib
    def test_similar_line_found_using_difflib(self):
        diff_files = [
            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line in file similar match\n', filename='file1')
        ]
        relevant_file = 'file1'
        relevant_line_in_file = '+relevant_line in file similar match ' # note the space at the end. This is to simulate a similar line found using difflib
        expected = (2, 1)
        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected

    # Tests that the function returns (-1, -1) when the relevant line is not found in the patch and no similar line is found using difflib
    def test_relevant_line_not_found(self):
        diff_files = [
            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1')
        ]
        relevant_file = 'file1'
        relevant_line_in_file = 'not_found'
        expected = (-1, -1)
        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected

    # Tests that the function returns (-1, -1) when the relevant file is not found in any of the patches
    def test_relevant_file_not_found(self):
        diff_files = [
            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file2')
        ]
        relevant_file = 'file1'
        relevant_line_in_file = 'relevant_line'
        expected = (-1, -1)
        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected

    # Tests that the function returns (-1, -1) when the relevant_line_in_file is an empty string
    def test_empty_relevant_line(self):
        diff_files = [
            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1')
        ]
        relevant_file = 'file1'
        relevant_line_in_file = ''
        expected = (0, 0)
        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected

    # Tests that the function returns (-1, -1) when the relevant_line_in_file is found in the patch but it is a deleted line
    def test_relevant_line_found_but_deleted(self):
        diff_files = [
            FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,2 +1,1 @@\n-line1\n-relevant_line\n', filename='file1')
        ]
        relevant_file = 'file1'
        relevant_line_in_file = 'relevant_line'
        expected = (-1, -1)
        assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected


================================================
FILE: tests/unittest/test_fix_json_escape_char.py
================================================
from pr_agent.algo.utils import fix_json_escape_char


class TestFixJsonEscapeChar:
    def test_valid_json(self):
        """Return unchanged when input JSON is already valid"""
        text = '{"a": 1, "b": "ok"}'
        expected_output = {"a": 1, "b": "ok"}
        assert fix_json_escape_char(text) == expected_output

    def test_single_control_char(self):
        """Remove a single ASCII control-character"""
        text = '{"msg": "hel\x01lo"}'
        expected_output = {"msg": "hel lo"}
        assert fix_json_escape_char(text) == expected_output

    def test_multiple_control_chars(self):
        """Remove multiple control-characters recursively"""
        text = '{"x": "A\x02B\x03C"}'
        expected_output = {"x": "A B C"}
        assert fix_json_escape_char(text) == expected_output


================================================
FILE: tests/unittest/test_fix_output.py
================================================
# Generated by CodiumAI

from pr_agent.algo.utils import try_fix_json


class TestTryFixJson:
    # Tests that JSON with complete 'Code suggestions' section returns expected output
    def test_incomplete_code_suggestions(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'  # noqa: E501
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output

    def test_incomplete_code_suggestions_new_line(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'  # noqa: E501
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output

    def test_incomplete_code_suggestions_many_close_brackets(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.}  ,..'  # noqa: E501
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output

    def test_incomplete_code_suggestions_relevant_file(self):
        review = '{"PR Analysis": {"Main theme": "xxx", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'  # noqa: E501
        expected_output = {
            'PR Analysis': {
                'Main theme': 'xxx',
                'Type of PR': 'Bug fix'
            },
            'PR Feedback': {
                'General PR suggestions': '..., `xxx`...',
                'Code suggestions': [
                    {
                        'relevant file': 'xxx.py',
                        'suggestion content': 'xxx [important]'
                    }
                ]
            }
        }
        assert try_fix_json(review) == expected_output


================================================
FILE: tests/unittest/test_fresh_vars_functionality.py
================================================
"""
Comprehensive unit tests for Dynaconf fresh_vars functionality.

These tests verify that the fresh_vars feature works correctly with the custom_merge_loader,
particularly for the GitLab credentials use case where values should be reloaded from disk
on each access rather than being cached.

The tests are designed to detect if fresh_vars is broken due to custom loader changes,
such as those introduced in https://github.com/qodo-ai/pr-agent/pull/2087.
"""

import os
import tempfile
from pathlib import Path
from unittest.mock import patch

import pytest
from dynaconf import Dynaconf

# Import get_settings at module level to complete the import chain and avoid circular import issues
# This ensures pr_agent.config_loader is fully loaded before custom_merge_loader is used in tests
from pr_agent.config_loader import get_settings  # noqa: F401


# Module-level helper function
def create_dynaconf_with_custom_loader(temp_dir, secrets_file):
    """
    Create a Dynaconf instance matching the production configuration.

    This mimics the config_loader.py setup with:
    - core_loaders disabled
    - custom_merge_loader and env_loader enabled
    - merge_enabled = True

    Note: fresh_vars should be configured via FRESH_VARS_FOR_DYNACONF environment variable,
    which is the only way to configure it in pr-agent.

    Args:
        temp_dir: Temporary directory path
        secrets_file: Path to secrets file

    Returns:
        Dynaconf instance configured like production
    """
    return Dynaconf(
        core_loaders=[],
        loaders=["pr_agent.custom_merge_loader", "dynaconf.loaders.env_loader"],
        root_path=temp_dir,
        merge_enabled=True,
        envvar_prefix=False,
        load_dotenv=False,
        settings_files=[str(secrets_file)],
    )


class TestFreshVarsGitLabScenario:
    """
    Test fresh_vars functionality for the GitLab credentials use case.

    This class tests the specific scenario where:
    - FRESH_VARS_FOR_DYNACONF='["GITLAB"]' is set
    - .secrets.toml contains gitlab.personal_access_token and gitlab.shared_secret
    - Values should be reloaded from disk on each access (not cached)
    """

    def setup_method(self):
        """Set up temporary directory and files for each test."""
        self.temp_dir = tempfile.mkdtemp()
        self.secrets_file = Path(self.temp_dir) / ".secrets.toml"

    def teardown_method(self):
        """Clean up temporary files after each test."""
        import shutil

        if hasattr(self, "temp_dir") and Path(self.temp_dir).exists():
            shutil.rmtree(self.temp_dir)

    def create_secrets_toml(self, personal_access_token="initial_token", shared_secret="initial_secret"):
        """
        Create a .secrets.toml file with GitLab credentials.

        Args:
            personal_access_token: The GitLab personal access token value
            shared_secret: The GitLab shared secret value
        """
        content = f"""[gitlab]
personal_access_token = "{personal_access_token}"
shared_secret = "{shared_secret}"
"""
        self.secrets_file.write_text(content)

    def test_gitlab_personal_access_token_reload(self):
        """
        Test that gitlab.personal_access_token is reloaded when marked as fresh.

        This is the critical test for the user's use case. It verifies that:
        1. Initial value is loaded correctly
        2. After modifying the file, the new value is returned (not cached)
        3. This works with the custom_merge_loader
        """
        # Create initial secrets file
        self.create_secrets_toml(personal_access_token="token_v1", shared_secret="secret_v1")

        # Set FRESH_VARS_FOR_DYNACONF environment variable (the only way to configure fresh_vars in pr-agent)
        with patch.dict(os.environ, {"FRESH_VARS_FOR_DYNACONF": '["GITLAB"]'}):
            # Create Dynaconf with GITLAB marked as fresh via env var
            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)

            # First access - should return initial value
            first_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert first_token == "token_v1", "Initial personal_access_token should be 'token_v1'"

        # Modify the secrets file
        self.create_secrets_toml(personal_access_token="token_v2_updated", shared_secret="secret_v1")

        # Second access - should return NEW value (not cached)
        second_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert second_token == "token_v2_updated", (
            "After file modification, personal_access_token should be reloaded to 'token_v2_updated'"
        )

        # Verify the values are different (fresh_vars working)
        assert first_token != second_token, "fresh_vars should cause values to be reloaded, not cached"

    def test_gitlab_multiple_fields_reload(self):
        """
        Test that both gitlab fields reload together when GITLAB is marked as fresh.

        This verifies that fresh_vars works correctly when multiple fields
        in the same section are modified simultaneously.
        """
        # Create initial secrets file
        self.create_secrets_toml(personal_access_token="token_v1", shared_secret="secret_v1")

        # Set FRESH_VARS_FOR_DYNACONF environment variable
        with patch.dict(os.environ, {"FRESH_VARS_FOR_DYNACONF": '["GITLAB"]'}):
            # Create Dynaconf with GITLAB marked as fresh via env var
            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)

            # First access - both fields
            first_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN
            first_secret = settings.GITLAB.SHARED_SECRET
            assert first_token == "token_v1"
            assert first_secret == "secret_v1"

            # Modify both fields in the secrets file
            self.create_secrets_toml(
                personal_access_token="token_v2_both_updated", shared_secret="secret_v2_both_updated"
            )

            # Second access - both fields should be updated
            second_token = settings.GITLAB.PERSONAL_ACCESS_TOKEN
            second_secret = settings.GITLAB.SHARED_SECRET

            assert second_token == "token_v2_both_updated", "personal_access_token should be reloaded"
            assert second_secret == "secret_v2_both_updated", "shared_secret should be reloaded"

            # Verify both fields were reloaded
            assert first_token != second_token, "personal_access_token should not be cached"
            assert first_secret != second_secret, "shared_secret should not be cached"


class TestFreshVarsCustomLoaderIntegration:
    """
    Test fresh_vars integration with custom_merge_loader.

    These tests verify that fresh_vars works correctly when using the
    custom_merge_loader instead of Dynaconf's default core loaders.
    """

    def setup_method(self):
        """Set up temporary directory and files for each test."""
        self.temp_dir = tempfile.mkdtemp()
        self.secrets_file = Path(self.temp_dir) / ".secrets.toml"

    def teardown_method(self):
        """Clean up temporary files after each test."""
        import shutil

        if hasattr(self, "temp_dir") and Path(self.temp_dir).exists():
            shutil.rmtree(self.temp_dir)

    def create_secrets_toml(self, personal_access_token="initial_token", shared_secret="initial_secret"):
        """Create a .secrets.toml file with GitLab credentials."""
        content = f"""[gitlab]
personal_access_token = "{personal_access_token}"
shared_secret = "{shared_secret}"
"""
        self.secrets_file.write_text(content)

    def test_fresh_vars_without_core_loaders(self):
        """
        Critical test: Verify fresh_vars works when core_loaders are disabled.

        This test detects if the bug exists where fresh_vars stops working
        when core_loaders=[] is set. This is the key issue that may have been
        introduced by the custom_merge_loader changes.

        Expected behavior:
        - If fresh_vars works: second_value != first_value
        - If fresh_vars is broken: second_value == first_value (cached)
        """
        # Create initial secrets file
        self.create_secrets_toml(personal_access_token="token_before_bug_test")

        # Set FRESH_VARS_FOR_DYNACONF environment variable
        with patch.dict(os.environ, {"FRESH_VARS_FOR_DYNACONF": '["GITLAB"]'}):
            # Create Dynaconf WITHOUT core loaders but WITH fresh_vars via env var
            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)

            # First access
            first_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert first_value == "token_before_bug_test", "Initial value should be loaded correctly"

        # Modify the file
        self.create_secrets_toml(personal_access_token="token_after_bug_test")

        # Second access - THIS IS THE CRITICAL CHECK
        second_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN

        # If this assertion fails, fresh_vars is broken with custom_merge_loader
        assert second_value == "token_after_bug_test", (
            "CRITICAL: fresh_vars should reload the value even with core_loaders=[]"
        )

        assert first_value != second_value, "CRITICAL: Values should be different, indicating fresh_vars is working"

    def test_custom_loader_respects_fresh_vars(self):
        """
        Test that custom_merge_loader respects the fresh_vars configuration.

        Verifies that when a section is marked as fresh, the custom loader
        doesn't cache values from that section.
        """
        # Create initial secrets file with multiple sections
        content = """[gitlab]
personal_access_token = "gitlab_token_v1"

[github]
user_token = "github_token_v1"
"""
        self.secrets_file.write_text(content)

        # Set FRESH_VARS_FOR_DYNACONF environment variable (only GITLAB)
        with patch.dict(os.environ, {"FRESH_VARS_FOR_DYNACONF": '["GITLAB"]'}):
            # Create Dynaconf with only GITLAB marked as fresh via env var
            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)

            # Access both sections
            gitlab_token_1 = settings.GITLAB.PERSONAL_ACCESS_TOKEN
            github_token_1 = settings.GITHUB.USER_TOKEN

            # Modify both sections
            content = """[gitlab]
personal_access_token = "gitlab_token_v2"

[github]
user_token = "github_token_v2"
"""
            self.secrets_file.write_text(content)

            # Access again
            gitlab_token_2 = settings.GITLAB.PERSONAL_ACCESS_TOKEN
            github_token_2 = settings.GITHUB.USER_TOKEN

            # GITLAB should be reloaded (marked as fresh)
            assert gitlab_token_2 == "gitlab_token_v2", "GITLAB section should be reloaded (marked as fresh)"
            assert gitlab_token_1 != gitlab_token_2, "GITLAB values should not be cached"

            # GITHUB should be cached (not marked as fresh)
            assert github_token_2 == "github_token_v1", "GITHUB section should be cached (not marked as fresh)"
            assert github_token_1 == github_token_2, "GITHUB values should be cached"


class TestFreshVarsBasicFunctionality:
    """
    Test basic fresh_vars functionality and edge cases.

    These tests verify fundamental fresh_vars behavior and ensure
    the feature works as expected in various scenarios.
    """

    def setup_method(self):
        """Set up temporary directory and files for each test."""
        self.temp_dir = tempfile.mkdtemp()
        self.secrets_file = Path(self.temp_dir) / ".secrets.toml"

    def teardown_method(self):
        """Clean up temporary files after each test."""
        import shutil

        if hasattr(self, "temp_dir") and Path(self.temp_dir).exists():
            shutil.rmtree(self.temp_dir)

    def create_secrets_toml(self, personal_access_token="initial_token"):
        """Create a .secrets.toml file with GitLab credentials."""
        content = f"""[gitlab]
personal_access_token = "{personal_access_token}"
"""
        self.secrets_file.write_text(content)

    def test_gitlab_credentials_not_cached_when_fresh(self):
        """
        Test that GitLab credentials are not cached when marked as fresh.

        This verifies the core requirement: when GITLAB is in fresh_vars,
        accessing the credentials multiple times should reload from disk
        each time, not return a cached value.
        """
        # Create initial secrets file
        self.create_secrets_toml(personal_access_token="no_cache_v1")

        # Set FRESH_VARS_FOR_DYNACONF environment variable
        with patch.dict(os.environ, {"FRESH_VARS_FOR_DYNACONF": '["GITLAB"]'}):
            # Create Dynaconf with GITLAB marked as fresh via env var
            settings = create_dynaconf_with_custom_loader(self.temp_dir, self.secrets_file)

            # Access the token multiple times before modification
            access_1 = settings.GITLAB.PERSONAL_ACCESS_TOKEN
            access_2 = settings.GITLAB.PERSONAL_ACCESS_TOKEN
            access_3 = settings.GITLAB.PERSONAL_ACCESS_TOKEN

        # All should return the same value (file hasn't changed)
        assert access_1 == access_2 == access_3 == "no_cache_v1", (
            "Multiple accesses before modification should return same value"
        )

        # Modify the file
        self.create_secrets_toml(personal_access_token="no_cache_v2")

        # Access again - should get new value immediately
        access_4 = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert access_4 == "no_cache_v2", "First access after modification should return new value"

        # Verify no caching occurred
        assert access_1 != access_4, "Value should change after file modification (no caching)"

        # Modify again
        self.create_secrets_toml(personal_access_token="no_cache_v3")

        # Access again - should get newest value
        access_5 = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert access_5 == "no_cache_v3", "Second modification should also be detected"

        # Verify the progression
        assert access_1 != access_4 != access_5, "Each modification should result in a different value (no caching)"

    def test_fresh_vars_works_with_default_loaders(self):
        """
        Test that fresh_vars works correctly with Dynaconf's default core loaders.

        This is a control test to prove that fresh_vars functionality works
        as expected when using the standard Dynaconf configuration (with core_loaders).
        This helps isolate the bug to the custom_merge_loader configuration.
        """
        # Create initial secrets file
        self.create_secrets_toml(personal_access_token="default_v1")

        # Create Dynaconf with DEFAULT loaders (not custom_merge_loader)
        settings = Dynaconf(
            # Use default core_loaders (don't disable them)
            root_path=self.temp_dir,
            merge_enabled=True,
            envvar_prefix=False,
            load_dotenv=False,
            settings_files=[str(self.secrets_file)],
            fresh_vars=["GITLAB"],
        )

        # First access
        first_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert first_value == "default_v1"

        # Modify file
        self.create_secrets_toml(personal_access_token="default_v2")

        # Second access - should be reloaded with default loaders
        second_value = settings.GITLAB.PERSONAL_ACCESS_TOKEN
        assert second_value == "default_v2", (
            "With default loaders, fresh_vars SHOULD work correctly. "
            "If this test fails, the issue is not specific to custom_merge_loader."
        )

        assert first_value != second_value, "Values should be different when using default loaders with fresh_vars"


if __name__ == "__main__":
    pytest.main([__file__, "-v"])


================================================
FILE: tests/unittest/test_get_max_tokens.py
================================================
import pytest

import pr_agent.algo.utils as utils
from pr_agent.algo.utils import MAX_TOKENS, get_max_tokens


class TestGetMaxTokens:

    # Test if the file is in MAX_TOKENS
    def test_model_max_tokens(self, monkeypatch):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 0
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        model = "gpt-3.5-turbo"
        expected = MAX_TOKENS[model]

        assert get_max_tokens(model) == expected

    @pytest.mark.parametrize("model", ["gpt-5.4", "gpt-5.4-2026-03-05"])
    def test_gpt54_model_max_tokens(self, monkeypatch, model):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 0
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        assert get_max_tokens(model) == 272000

    # Test situations where the model is not registered and exists as a custom model
    def test_model_has_custom(self, monkeypatch):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 5000,
                'max_model_tokens': 0  # 제한 없음
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        model = "custom-model"
        expected = 5000

        assert get_max_tokens(model) == expected

    @pytest.mark.parametrize("model", [
        "gpt-5.1-codex",
        "gpt-5.2-codex",
        "gpt-5.3-codex",
    ])
    def test_gpt_codex_models_max_tokens(self, monkeypatch, model):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 0
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        expected = MAX_TOKENS[model]

        assert get_max_tokens(model) == expected

    def test_model_not_max_tokens_and_not_has_custom(self, monkeypatch):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 0
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        model = "custom-model"

        with pytest.raises(Exception):
            get_max_tokens(model)

    def test_model_max_tokens_with__limit(self, monkeypatch):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 10000
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        model = "gpt-3.5-turbo"  # this model setting is 160000
        expected = 10000

        assert get_max_tokens(model) == expected

    @pytest.mark.parametrize("model", [
        "gemini/gemini-3-flash-preview",
        "vertex_ai/gemini-3-flash-preview",
        "gemini/gemini-3-pro-preview",
        "vertex_ai/gemini-3-pro-preview",
        "gemini/gemini-3.1-pro-preview",
        "vertex_ai/gemini-3.1-pro-preview",
    ])
    def test_gemini_3_and_3_1_pro_preview(self, monkeypatch, model):
        fake_settings = type("", (), {
            "config": type("", (), {
                "custom_model_max_tokens": 0,
                "max_model_tokens": 0,
            })()
        })()
        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)
        assert get_max_tokens(model) == 1048576

    @pytest.mark.parametrize(
        "model",
        [
            "anthropic/claude-opus-4-6",
            "claude-opus-4-6",
            "vertex_ai/claude-opus-4-6",
            "bedrock/anthropic.claude-opus-4-6-v1:0",
            "bedrock/global.anthropic.claude-opus-4-6-v1:0",
            "bedrock/us.anthropic.claude-opus-4-6-v1:0",
        ],
    )
    def test_claude_opus_4_6_model_max_tokens(self, monkeypatch, model):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 0
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        assert get_max_tokens(model) == 200000

    @pytest.mark.parametrize(
        "model",
        [
            "anthropic/claude-sonnet-4-6",
            "claude-sonnet-4-6",
            "vertex_ai/claude-sonnet-4-6",
            "bedrock/anthropic.claude-sonnet-4-6",
            "bedrock/global.anthropic.claude-sonnet-4-6",
            "bedrock/us.anthropic.claude-sonnet-4-6",
            "bedrock/au.anthropic.claude-sonnet-4-6",
            "bedrock/eu.anthropic.claude-sonnet-4-6",
            "bedrock/jp.anthropic.claude-sonnet-4-6",
        ],
    )
    def test_claude_sonnet_4_6_model_max_tokens(self, monkeypatch, model):
        fake_settings = type('', (), {
            'config': type('', (), {
                'custom_model_max_tokens': 0,
                'max_model_tokens': 0
            })()
        })()

        monkeypatch.setattr(utils, "get_settings", lambda: fake_settings)

        assert get_max_tokens(model) == 200000


================================================
FILE: tests/unittest/test_gitea_provider.py
================================================
from io import BytesIO
from unittest.mock import MagicMock, patch


class TestGiteaProvider:
    @patch('pr_agent.git_providers.gitea_provider.get_settings')
    @patch('pr_agent.git_providers.gitea_provider.giteapy.ApiClient')
    def test_gitea_provider_auth_header(self, mock_api_client_cls, mock_get_settings):
        # Setup settings
        settings = MagicMock()
        settings.get.side_effect = lambda k, d=None: {
            'GITEA.URL': 'https://gitea.example.com',
            'GITEA.PERSONAL_ACCESS_TOKEN': 'test-token',
            'GITEA.REPO_SETTING': None,
            'GITEA.SKIP_SSL_VERIFICATION': False,
            'GITEA.SSL_CA_CERT': None
        }.get(k, d)
        mock_get_settings.return_value = settings

        # Setup ApiClient mock
        mock_api_client = mock_api_client_cls.return_value
        # Mock configuration object on client
        mock_api_client.configuration.api_key = {'Authorization': 'token test-token'}

        # Mock responses for calls made during initialization
        def call_api_side_effect(path, method, **kwargs):
            mock_resp = MagicMock()
            if 'files' in path: # get_change_file_pull_request
                mock_resp.data = BytesIO(b'[]')
                return mock_resp
            if 'commits' in path:
                mock_resp.data = BytesIO(b'[]')
                return mock_resp

            # Default fallback
            mock_resp.data = BytesIO(b'{}')
            return mock_resp

        mock_api_client.call_api.side_effect = call_api_side_effect

        from pr_agent.git_providers.gitea_provider import RepoApi

        client = mock_api_client
        repo_api = RepoApi(client)

        # Now test methods independently

        # 1. get_change_file_pull_request
        mock_api_client.reset_mock()
        mock_resp = MagicMock()
        mock_resp.data = BytesIO(b'[]')
        mock_api_client.call_api.return_value = mock_resp

        repo_api.get_change_file_pull_request('owner', 'repo', 123)

        args, kwargs = mock_api_client.call_api.call_args
        assert '/repos/owner/repo/pulls/123/files' in args[0]
        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']
        assert 'token=' not in args[0]

        # 2. get_pull_request_diff
        mock_api_client.reset_mock()
        mock_resp = MagicMock()
        mock_resp.data = BytesIO(b'diff content')
        mock_api_client.call_api.return_value = mock_resp

        repo_api.get_pull_request_diff('owner', 'repo', 123)

        args, kwargs = mock_api_client.call_api.call_args
        assert args[0] == '/repos/owner/repo/pulls/123.diff'
        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']

        # 3. get_languages
        mock_api_client.reset_mock()
        mock_resp.data = BytesIO(b'{"Python": 100}')
        mock_api_client.call_api.return_value = mock_resp

        repo_api.get_languages('owner', 'repo')

        args, kwargs = mock_api_client.call_api.call_args
        assert args[0] == '/repos/owner/repo/languages'
        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']

        # 4. get_file_content
        mock_api_client.reset_mock()
        mock_resp.data = BytesIO(b'content')
        mock_api_client.call_api.return_value = mock_resp

        repo_api.get_file_content('owner', 'repo', 'sha1', 'file.txt')

        args, kwargs = mock_api_client.call_api.call_args
        assert args[0] == '/repos/owner/repo/raw/file.txt'
        assert kwargs.get('query_params') == [('ref', 'sha1')]
        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']

        # 5. get_pr_commits
        mock_api_client.reset_mock()
        mock_resp.data = BytesIO(b'[]')
        mock_api_client.call_api.return_value = mock_resp

        repo_api.get_pr_commits('owner', 'repo', 123)

        args, kwargs = mock_api_client.call_api.call_args
        assert args[0] == '/repos/owner/repo/pulls/123/commits'
        assert kwargs.get('auth_settings') == ['AuthorizationHeaderToken']


================================================
FILE: tests/unittest/test_github_action_output.py
================================================
import json
import os

from pr_agent.algo.utils import get_settings, github_action_output


class TestGitHubOutput:
    def test_github_action_output_enabled(self, monkeypatch, tmp_path):
        get_settings().set('GITHUB_ACTION_CONFIG.ENABLE_OUTPUT', True)
        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))
        output_data = {'key1': {'value1': 1, 'value2': 2}}
        key_name = 'key1'

        github_action_output(output_data, key_name)

        with open(str(tmp_path / 'output'), 'r') as f:
            env_value = f.read()

        actual_key = env_value.split('=')[0]
        actual_data = json.loads(env_value.split('=')[1])

        assert actual_key == key_name
        assert actual_data == output_data[key_name]

    def test_github_action_output_disabled(self, monkeypatch, tmp_path):
        get_settings().set('GITHUB_ACTION_CONFIG.ENABLE_OUTPUT', False)
        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))
        output_data = {'key1': {'value1': 1, 'value2': 2}}
        key_name = 'key1'

        github_action_output(output_data, key_name)

        assert not os.path.exists(str(tmp_path / 'output'))

    def test_github_action_output_notset(self, monkeypatch, tmp_path):
        # not set config
        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))
        output_data = {'key1': {'value1': 1, 'value2': 2}}
        key_name = 'key1'

        github_action_output(output_data, key_name)

        assert not os.path.exists(str(tmp_path / 'output'))

    def test_github_action_output_error_case(self, monkeypatch, tmp_path):
        monkeypatch.setenv('GITHUB_OUTPUT', str(tmp_path / 'output'))
        output_data = None # invalid data
        key_name = 'key1'

        github_action_output(output_data, key_name)

        assert not os.path.exists(str(tmp_path / 'output'))


================================================
FILE: tests/unittest/test_gitlab_provider.py
================================================
from unittest.mock import MagicMock, patch

import pytest
from gitlab import Gitlab
from gitlab.exceptions import GitlabGetError
from gitlab.v4.objects import Project, ProjectFile

from pr_agent.git_providers.gitlab_provider import GitLabProvider


class TestGitLabProvider:
    """Test suite for GitLab provider functionality."""

    @pytest.fixture
    def mock_gitlab_client(self):
        client = MagicMock()
        return client

    @pytest.fixture
    def mock_project(self):
        project = MagicMock()
        return project

    @pytest.fixture
    def gitlab_provider(self, mock_gitlab_client, mock_project):
        with patch('pr_agent.git_providers.gitlab_provider.gitlab.Gitlab', return_value=mock_gitlab_client), \
             patch('pr_agent.git_providers.gitlab_provider.get_settings') as mock_settings:

            mock_settings.return_value.get.side_effect = lambda key, default=None: {
                "GITLAB.URL": "https://gitlab.com",
                "GITLAB.PERSONAL_ACCESS_TOKEN": "fake_token"
            }.get(key, default)

            mock_gitlab_client.projects.get.return_value = mock_project
            provider = GitLabProvider("https://gitlab.com/test/repo/-/merge_requests/1")
            provider.gl = mock_gitlab_client
            provider.id_project = "test/repo"
            return provider

    def test_get_pr_file_content_success(self, gitlab_provider, mock_project):
        mock_file = MagicMock(ProjectFile)
        mock_file.decode.return_value = "# Changelog\n\n## v1.0.0\n- Initial release"
        mock_project.files.get.return_value = mock_file

        content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main")

        assert content == "# Changelog\n\n## v1.0.0\n- Initial release"
        mock_project.files.get.assert_called_once_with("CHANGELOG.md", "main")
        mock_file.decode.assert_called_once()

    def test_get_pr_file_content_with_bytes(self, gitlab_provider, mock_project):
        mock_file = MagicMock(ProjectFile)
        mock_file.decode.return_value = b"# Changelog\n\n## v1.0.0\n- Initial release"
        mock_project.files.get.return_value = mock_file

        content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main")

        assert content == "# Changelog\n\n## v1.0.0\n- Initial release"
        mock_project.files.get.assert_called_once_with("CHANGELOG.md", "main")

    def test_get_pr_file_content_file_not_found(self, gitlab_provider, mock_project):
        mock_project.files.get.side_effect = GitlabGetError("404 Not Found")

        content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main")

        assert content == ""
        mock_project.files.get.assert_called_once_with("CHANGELOG.md", "main")

    def test_get_pr_file_content_other_exception(self, gitlab_provider, mock_project):
        mock_project.files.get.side_effect = Exception("Network error")

        content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main")

        assert content == ""

    def test_create_or_update_pr_file_create_new(self, gitlab_provider, mock_project):
        mock_project.files.get.side_effect = GitlabGetError("404 Not Found")
        mock_file = MagicMock()
        mock_project.files.create.return_value = mock_file

        new_content = "# Changelog\n\n## v1.1.0\n- New feature"
        commit_message = "Add CHANGELOG.md"

        gitlab_provider.create_or_update_pr_file(
            "CHANGELOG.md", "feature-branch", new_content, commit_message
        )

        mock_project.files.get.assert_called_once_with("CHANGELOG.md", "feature-branch")
        mock_project.files.create.assert_called_once_with({
            'file_path': 'CHANGELOG.md',
            'branch': 'feature-branch',
            'content': new_content,
            'commit_message': commit_message,
        })

    def test_create_or_update_pr_file_update_existing(self, gitlab_provider, mock_project):
        mock_file = MagicMock(ProjectFile)
        mock_file.decode.return_value = "# Old changelog content"
        mock_project.files.get.return_value = mock_file

        new_content = "# New changelog content"
        commit_message = "Update CHANGELOG.md"

        gitlab_provider.create_or_update_pr_file(
            "CHANGELOG.md", "feature-branch", new_content, commit_message
        )

        mock_project.files.get.assert_called_once_with("CHANGELOG.md", "feature-branch")
        mock_file.content = new_content
        mock_file.save.assert_called_once_with(branch="feature-branch", commit_message=commit_message)

    def test_create_or_update_pr_file_update_exception(self, gitlab_provider, mock_project):
        mock_project.files.get.side_effect = Exception("Network error")

        with pytest.raises(Exception):
            gitlab_provider.create_or_update_pr_file(
                "CHANGELOG.md", "feature-branch", "content", "message"
            )

    def test_has_create_or_update_pr_file_method(self, gitlab_provider):
        assert hasattr(gitlab_provider, "create_or_update_pr_file")
        assert callable(getattr(gitlab_provider, "create_or_update_pr_file"))

    def test_method_signature_compatibility(self, gitlab_provider):
        import inspect

        sig = inspect.signature(gitlab_provider.create_or_update_pr_file)
        params = list(sig.parameters.keys())

        expected_params = ['file_path', 'branch', 'contents', 'message']
        assert params == expected_params

    @pytest.mark.parametrize("content,expected", [
        ("simple text", "simple text"),
        (b"bytes content", "bytes content"),
        ("", ""),
        (b"", ""),
        ("unicode: café", "unicode: café"),
        (b"unicode: caf\xc3\xa9", "unicode: café"),
    ])
    def test_content_encoding_handling(self, gitlab_provider, mock_project, content, expected):
        mock_file = MagicMock(ProjectFile)
        mock_file.decode.return_value = content
        mock_project.files.get.return_value = mock_file

        result = gitlab_provider.get_pr_file_content("test.md", "main")

        assert result == expected

    def test_get_gitmodules_map_parsing(self, gitlab_provider, mock_project):
        gitlab_provider.id_project = "1"
        gitlab_provider.mr = MagicMock()
        gitlab_provider.mr.target_branch = "main"

        file_obj = MagicMock(ProjectFile)
        file_obj.decode.return_value = (
            "[submodule \"libs/a\"]\n"
            "    path = \"libs/a\"\n"
            "    url = \"https://gitlab.com/a.git\"\n"
            "[submodule \"libs/b\"]\n"
            "    path = libs/b\n"
            "    url = git@gitlab.com:b.git\n"
        )
        mock_project.files.get.return_value = file_obj
        gitlab_provider.gl.projects.get.return_value = mock_project

        result = gitlab_provider._get_gitmodules_map()
        assert result == {
            "libs/a": "https://gitlab.com/a.git",
            "libs/b": "git@gitlab.com:b.git",
        }

    def test_project_by_path_requires_exact_match(self, gitlab_provider):
        gitlab_provider.gl.projects.get.reset_mock()
        gitlab_provider.gl.projects.get.side_effect = Exception("not found")
        fake = MagicMock()
        fake.path_with_namespace = "other/group/repo"
        gitlab_provider.gl.projects.list.return_value = [fake]

        result = gitlab_provider._project_by_path("group/repo")

        assert result is None
        assert gitlab_provider.gl.projects.get.call_count == 2

    def test_compare_submodule_cached(self, gitlab_provider):
        proj = MagicMock()
        proj.repository_compare.return_value = {"diffs": [{"diff": "d"}]}
        with patch.object(gitlab_provider, "_project_by_path", return_value=proj) as m_pbp:
            first = gitlab_provider._compare_submodule("grp/repo", "old", "new")
            second = gitlab_provider._compare_submodule("grp/repo", "old", "new")

        assert first == second == [{"diff": "d"}]
        m_pbp.assert_called_once_with("grp/repo")
        proj.repository_compare.assert_called_once_with("old", "new")


================================================
FILE: tests/unittest/test_gitlab_webhook_port.py
================================================
import os
from unittest import mock

os.environ.setdefault("GITLAB__URL", "https://gitlab.example.com")
import pr_agent.servers.gitlab_webhook as gitlab_webhook


def test_start_uses_port_env(monkeypatch):
    monkeypatch.setenv("PORT", "4567")

    with mock.patch.object(gitlab_webhook.uvicorn, "run") as mock_run:
        gitlab_webhook.start()

    _, kwargs = mock_run.call_args
    assert kwargs["port"] == 4567
    assert kwargs["host"] == "0.0.0.0"


def test_start_invalid_port_env(monkeypatch):
    monkeypatch.setenv("PORT", "not-a-number")

    with mock.patch.object(gitlab_webhook.uvicorn, "run") as mock_run:
        gitlab_webhook.start()

    _, kwargs = mock_run.call_args
    assert kwargs["port"] == 3000


def test_start_default_port(monkeypatch):
    monkeypatch.delenv("PORT", raising=False)

    with mock.patch.object(gitlab_webhook.uvicorn, "run") as mock_run:
        gitlab_webhook.start()

    _, kwargs = mock_run.call_args
    assert kwargs["port"] == 3000


def test_start_invalid_port_range(monkeypatch):
    monkeypatch.setenv("PORT", "70000")

    with mock.patch.object(gitlab_webhook.uvicorn, "run") as mock_run:
        gitlab_webhook.start()

    _, kwargs = mock_run.call_args
    assert kwargs["port"] == 3000


================================================
FILE: tests/unittest/test_handle_patch_deletions.py
================================================
# Generated by CodiumAI
import logging

from pr_agent.algo.git_patch_processing import handle_patch_deletions
from pr_agent.config_loader import get_settings

"""
Code Analysis

Objective:
The objective of the function is to handle entire file or deletion patches and return the patch after omitting the
deletion hunks.

Inputs:
- patch: a string representing the patch to be handled
- original_file_content_str: a string representing the original content of the file
- new_file_content_str: a string representing the new content of the file
- file_name: a string representing the name of the file

Flow:
- If new_file_content_str is empty, set patch to "File was deleted" and return it
- Otherwise, split patch into lines and omit the deletion hunks using the omit_deletion_hunks function
- If the resulting patch is different from the original patch, log a message and set patch to the new patch
- Return the resulting patch

Outputs:
- A string representing the patch after omitting the deletion hunks

Additional aspects:
- The function uses the settings from the configuration files to determine the verbosity level of the logging messages
- The omit_deletion_hunks function is called to remove the deletion hunks from the patch
- The function handles the case where the new_file_content_str is empty by setting the patch to "File was deleted"
"""


class TestHandlePatchDeletions:
    # Tests that handle_patch_deletions returns the original patch when new_file_content_str is not empty
    def test_handle_patch_deletions_happy_path_new_file_content_exists(self):
        patch = '--- a/file.py\n+++ b/file.py\n@@ -1,2 +1,2 @@\n-foo\n-bar\n+baz\n'
        original_file_content_str = 'foo\nbar\n'
        new_file_content_str = 'foo\nbaz\n'
        file_name = 'file.py'
        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
                                      file_name) == patch.rstrip()

    # Tests that handle_patch_deletions returns 'File was deleted' when new_file_content_str is empty
    def test_handle_patch_deletions_edge_case_new_file_content_empty(self):
        patch = '--- a/file.py\n+++ b/file.py\n@@ -1,2 +1,2 @@\n-foo\n-bar\n'
        original_file_content_str = 'foo\nbar\n'
        new_file_content_str = ''
        file_name = 'file.py'
        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
                                      file_name) is None

    # Tests that handle_patch_deletions returns the original patch when patch and patch_new are equal
    def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_equal(self):
        patch = '--- a/file.py\n+++ b/file.py\n@@ -1,2 +1,2 @@\n-foo\n-bar\n'
        original_file_content_str = 'foo\nbar\n'
        new_file_content_str = 'foo\nbar\n'
        file_name = 'file.py'
        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
                                      file_name).rstrip() == patch.rstrip()

    # Tests that handle_patch_deletions returns the modified patch when patch and patch_new are not equal
    def test_handle_patch_deletions_edge_case_patch_and_patch_new_are_not_equal(self):
        patch = '--- a/file.py\n+++ b/file.py\n@@ -1,2 +1,2 @@\n-foo\n-bar\n'
        original_file_content_str = 'foo\nbar\n'
        new_file_content_str = 'foo\nbaz\n'
        file_name = 'file.py'
        expected_patch = '--- a/file.py\n+++ b/file.py\n@@ -1,2 +1,2 @@\n-foo\n-bar'
        assert handle_patch_deletions(patch, original_file_content_str, new_file_content_str,
                                      file_name) == expected_patch


================================================
FILE: tests/unittest/test_ignore_repositories.py
================================================
import pytest

from pr_agent.config_loader import get_settings
from pr_agent.servers.bitbucket_app import should_process_pr_logic as bitbucket_should_process_pr_logic
from pr_agent.servers.github_app import should_process_pr_logic as github_should_process_pr_logic
from pr_agent.servers.gitlab_webhook import should_process_pr_logic as gitlab_should_process_pr_logic


def make_bitbucket_payload(full_name):
    return {
        "data": {
            "pullrequest": {
                "title": "Test PR",
                "source": {"branch": {"name": "feature/test"}},
                "destination": {
                    "branch": {"name": "main"},
                    "repository": {"full_name": full_name}
                }
            },
            "actor": {"username": "user", "type": "user"}
        }
    }

def make_github_body(full_name):
    return {
        "pull_request": {},
        "repository": {"full_name": full_name},
        "sender": {"login": "user"}
    }

def make_gitlab_body(full_name):
    return {
        "object_attributes": {"title": "Test MR"},
        "project": {"path_with_namespace": full_name}
    }

PROVIDERS = [
    ("github", github_should_process_pr_logic, make_github_body),
    ("bitbucket", bitbucket_should_process_pr_logic, make_bitbucket_payload),
    ("gitlab", gitlab_should_process_pr_logic, make_gitlab_body),
]

class TestIgnoreRepositories:
    def setup_method(self):
        get_settings().set("CONFIG.IGNORE_REPOSITORIES", [])

    @pytest.mark.parametrize("provider_name, provider_func, body_func", PROVIDERS)
    def test_should_ignore_matching_repository(self, provider_name, provider_func, body_func):
        get_settings().set("CONFIG.IGNORE_REPOSITORIES", ["org/repo-to-ignore"])
        body = {
            "pull_request": {},
            "repository": {"full_name": "org/repo-to-ignore"},
            "sender": {"login": "user"}
        }
        result = provider_func(body_func(body["repository"]["full_name"]))
        # print(f"DEBUG: Provider={provider_name}, test_should_ignore_matching_repository, result={result}")
        assert result is False, f"{provider_name}: PR from ignored repository should be ignored (return False)"

    @pytest.mark.parametrize("provider_name, provider_func, body_func", PROVIDERS)
    def test_should_not_ignore_non_matching_repository(self, provider_name, provider_func, body_func):
        get_settings().set("CONFIG.IGNORE_REPOSITORIES", ["org/repo-to-ignore"])
        body = {
            "pull_request": {},
            "repository": {"full_name": "org/other-repo"},
            "sender": {"login": "user"}
        }
        result = provider_func(body_func(body["repository"]["full_name"]))
        # print(f"DEBUG: Provider={provider_name}, test_should_not_ignore_non_matching_repository, result={result}")
        assert result is True, f"{provider_name}: PR from non-ignored repository should not be ignored (return True)"

    @pytest.mark.parametrize("provider_name, provider_func, body_func", PROVIDERS)
    def test_should_not_ignore_when_config_empty(self, provider_name, provider_func, body_func):
        get_settings().set("CONFIG.IGNORE_REPOSITORIES", [])
        body = {
            "pull_request": {},
            "repository": {"full_name": "org/repo-to-ignore"},
            "sender": {"login": "user"}
        }
        result = provider_func(body_func(body["repository"]["full_name"]))
        # print(f"DEBUG: Provider={provider_name}, test_should_not_ignore_when_config_empty, result={result}")
        assert result is True, f"{provider_name}: PR should not be ignored if ignore_repositories config is empty" 

================================================
FILE: tests/unittest/test_language_handler.py
================================================

# Generated by CodiumAI

from pr_agent.algo.language_handler import sort_files_by_main_languages

"""
Code Analysis

Objective:
The objective of the function is to sort a list of files by their main language, putting the files that are in the main
language first and the rest of the files after. It takes in a dictionary of languages and their sizes, and a list of
files.

Inputs:
- languages: a dictionary containing the languages and their sizes
- files: a list of files

Flow:
1. Sort the languages by their size in descending order
2. Get all extensions for the languages
3. Filter out files with bad extensions
4. Sort files by their extension, putting the files that are in the main extension first and the rest of the files after
5. Map languages_sorted to their respective files
6. Append the files to the files_sorted list
7. Append the rest of the files to the files_sorted list under the "Other" language category
8. Return the files_sorted list

Outputs:
- files_sorted: a list of dictionaries containing the language and its respective files

Additional aspects:
- The function uses a language_extension_map dictionary to map the languages to their respective extensions
- The function uses the filter_bad_extensions function to filter out files with bad extensions
- The function uses a rest_files dictionary to store the files that do not belong to any of the main extensions
"""


class TestSortFilesByMainLanguages:
    # Tests that files are sorted by main language, with files in main language first and the rest after
    def test_happy_path_sort_files_by_main_languages(self):
        languages = {'Python': 10, 'Java': 5, 'C++': 3}
        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file4.py'})(),
            type('', (object,), {'filename': 'file5.py'})()
        ]
        expected_output = [
            {'language': 'Python', 'files': [files[0], files[3], files[4]]},
            {'language': 'Java', 'files': [files[1]]},
            {'language': 'C++', 'files': [files[2]]},
            {'language': 'Other', 'files': []}
        ]
        assert sort_files_by_main_languages(languages, files) == expected_output

    # Tests that function handles empty languages dictionary
    def test_edge_case_empty_languages(self):
        languages = {}
        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})()
        ]
        expected_output = [{'language': 'Other', 'files': files}]
        assert sort_files_by_main_languages(languages, files) == expected_output

    # Tests that function handles empty files list
    def test_edge_case_empty_files(self):
        languages = {'Python': 10, 'Java': 5}
        files = []
        expected_output = [
            {'language': 'Other', 'files': []}
        ]
        assert sort_files_by_main_languages(languages, files) == expected_output

    # Tests that function handles languages with no extensions
    def test_edge_case_languages_with_no_extensions(self):
        languages = {'Python': 10, 'Java': 5, 'C++': 3}
        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file3.test'})()
        ]
        expected_output = [
            {'language': 'Python', 'files': [files[0]]},
            {'language': 'Java', 'files': [files[1]]},
            {'language': 'C++', 'files': [files[2]]},
            {'language': 'Other', 'files': [files[3]]}
        ]
        assert sort_files_by_main_languages(languages, files) == expected_output

    # Tests the behavior of the function when all files have bad extensions and only one new valid file is added.
    def test_edge_case_files_with_bad_extensions_only(self):
        languages = {'Python': 10, 'Java': 5, 'C++': 3}
        files = [
            type('', (object,), {'filename': 'file1.csv'})(),
            type('', (object,), {'filename': 'file2.pdf'})(),
            type('', (object,), {'filename': 'file3.py'})()  # new valid file
        ]
        expected_output = [{'language': 'Python', 'files': [files[2]]}, {'language': 'Other', 'files': []}]
        assert sort_files_by_main_languages(languages, files) == expected_output

    # Tests general behaviour of function
    def test_general_behaviour_sort_files_by_main_languages(self):
        languages = {'Python': 10, 'Java': 5, 'C++': 3}
        files = [
            type('', (object,), {'filename': 'file1.py'})(),
            type('', (object,), {'filename': 'file2.java'})(),
            type('', (object,), {'filename': 'file3.cpp'})(),
            type('', (object,), {'filename': 'file4.py'})(),
            type('', (object,), {'filename': 'file5.py'})(),
            type('', (object,), {'filename': 'file6.py'})(),
            type('', (object,), {'filename': 'file7.java'})(),
            type('', (object,), {'filename': 'file8.cpp'})(),
            type('', (object,), {'filename': 'file9.py'})()
        ]
        expected_output = [
            {'language': 'Python', 'files': [files[0], files[3], files[4], files[5], files[8]]},
            {'language': 'Java', 'files': [files[1], files[6]]},
            {'language': 'C++', 'files': [files[2], files[7]]},
            {'language': 'Other', 'files': []}
        ]
        assert sort_files_by_main_languages(languages, files) == expected_output


================================================
FILE: tests/unittest/test_litellm_reasoning_effort.py
================================================
from unittest.mock import AsyncMock, MagicMock, call, patch

import pytest

import pr_agent.algo.ai_handlers.litellm_ai_handler as litellm_handler
from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler


def create_mock_settings(reasoning_effort_value):
    """Create a fake settings object with configurable reasoning_effort."""
    return type('', (), {
        'config': type('', (), {
            'reasoning_effort': reasoning_effort_value,
            'ai_timeout': 120,
            'custom_reasoning_model': False,
            'max_model_tokens': 32000,
            'verbosity_level': 0,
            'get': lambda self, key, default=None: default
        })(),
        'litellm': type('', (), {
            'get': lambda self, key, default=None: default
        })(),
        'get': lambda self, key, default=None: default
    })()


def create_mock_acompletion_response():
    """Create a properly structured mock response for acompletion."""
    mock_response = MagicMock()
    mock_response.__getitem__ = lambda self, key: {
        "choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]
    }[key]
    mock_response.dict.return_value = {"choices": [{"message": {"content": "test"}, "finish_reason": "stop"}]}
    return mock_response


@pytest.fixture
def mock_logger():
    """Mock logger to capture info and warning calls."""
    with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.get_logger') as mock_log:
        mock_log_instance = MagicMock()
        mock_log.return_value = mock_log_instance
        yield mock_log_instance


class TestLiteLLMReasoningEffort:
    """
    Comprehensive test suite for GPT-5 reasoning_effort configuration handling.

    Tests cover:
    - Valid reasoning_effort values for GPT-5 models
    - Invalid reasoning_effort values with warning logging
    - Model detection (GPT-5 vs non-GPT-5)
    - Model suffix handling (_thinking vs regular)
    - Default fallback logic
    - Logging behavior (info and warning messages)
    - thinking_kwargs_gpt5 structure validation
    """

    # ========== Group 1: Valid Configuration Tests ==========

    @pytest.mark.asyncio
    async def test_gpt5_valid_reasoning_effort_none(self, monkeypatch, mock_logger):
        """Test GPT-5 with valid reasoning_effort='none' from config."""
        fake_settings = create_mock_settings("none")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        # Mock acompletion to capture kwargs
        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Verify the call was made with correct reasoning_effort
            assert mock_completion.called
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "none"
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]

            # Verify info log
            mock_logger.info.assert_any_call("Using reasoning_effort='none' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_valid_reasoning_effort_low(self, monkeypatch, mock_logger):
        """Test GPT-5 with valid reasoning_effort='low' from config."""
        fake_settings = create_mock_settings("low")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "low"
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]
            mock_logger.info.assert_any_call("Using reasoning_effort='low' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_valid_reasoning_effort_medium(self, monkeypatch, mock_logger):
        """Test GPT-5 with valid reasoning_effort='medium' from config."""
        fake_settings = create_mock_settings("medium")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_valid_reasoning_effort_high(self, monkeypatch, mock_logger):
        """Test GPT-5 with valid reasoning_effort='high' from config."""
        fake_settings = create_mock_settings("high")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "high"
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]
            mock_logger.info.assert_any_call("Using reasoning_effort='high' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_valid_reasoning_effort_xhigh(self, monkeypatch, mock_logger):
        """Test GPT-5 with valid reasoning_effort='xhigh' from config."""
        fake_settings = create_mock_settings("xhigh")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5.2",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "xhigh"
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]
            mock_logger.info.assert_any_call("Using reasoning_effort='xhigh' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_valid_reasoning_effort_minimal(self, monkeypatch, mock_logger):
        """Test GPT-5 with valid reasoning_effort='minimal' from config."""
        fake_settings = create_mock_settings("minimal")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "minimal"
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]
            mock_logger.info.assert_any_call("Using reasoning_effort='minimal' for GPT-5 model")

    # ========== Group 2: Invalid Configuration Tests ==========

    @pytest.mark.asyncio
    async def test_gpt5_invalid_reasoning_effort_with_warning(self, monkeypatch, mock_logger):
        """Test GPT-5 with invalid reasoning_effort logs warning and uses default."""
        fake_settings = create_mock_settings("extreme")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Should default to 'medium'
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"

            # Verify warning logged
            mock_logger.warning.assert_called_once()
            warning_call = mock_logger.warning.call_args[0][0]
            assert "Invalid reasoning_effort 'extreme' in config" in warning_call
            assert "Valid values:" in warning_call

            # Verify info log
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_invalid_reasoning_effort_thinking_model(self, monkeypatch, mock_logger):
        """Test GPT-5 _thinking model with invalid reasoning_effort defaults to 'medium'."""
        fake_settings = create_mock_settings("invalid_value")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07_thinking",
                system="test system",
                user="test user"
            )

            # Should default to 'medium' (no special handling for _thinking models)
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"

            # Verify warning logged
            mock_logger.warning.assert_called_once()

            # Verify info log
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_none_config_defaults_to_medium(self, monkeypatch, mock_logger):
        """Test GPT-5 with None config defaults to 'medium' without warning."""
        fake_settings = create_mock_settings(None)
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Should default to 'medium'
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"

            # No warning should be logged
            mock_logger.warning.assert_not_called()

            # Info log should show effort
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_none_config_thinking_model_defaults_to_medium(self, monkeypatch, mock_logger):
        """Test GPT-5 _thinking model with None config defaults to 'medium' without warning."""
        fake_settings = create_mock_settings(None)
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07_thinking",
                system="test system",
                user="test user"
            )

            # Should default to 'medium' (no special handling for _thinking models)
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"

            # No warning should be logged
            mock_logger.warning.assert_not_called()

            # Info log
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    # ========== Group 3: Model Detection Tests ==========

    @pytest.mark.asyncio
    async def test_gpt5_model_detection_various_versions(self, monkeypatch, mock_logger):
        """Test various GPT-5 model version strings trigger the reasoning_effort logic."""
        fake_settings = create_mock_settings("medium")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        gpt5_models = [
            "gpt-5-2025-08-07",
            "gpt-5.1",
            "gpt-5.4",
            "gpt-5.4-2026-03-05",
            "gpt-5-turbo",
            "gpt-5.1-codex",
            "gpt-5.3-codex",
        ]

        for model in gpt5_models:
            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
                mock_completion.return_value = create_mock_acompletion_response()

                handler = LiteLLMAIHandler()
                await handler.chat_completion(
                    model=model,
                    system="test system",
                    user="test user"
                )

                # All should trigger GPT-5 logic
                call_kwargs = mock_completion.call_args[1]
                assert call_kwargs["reasoning_effort"] == "medium"
                assert "reasoning_effort" in call_kwargs["allowed_openai_params"]

    @pytest.mark.asyncio
    async def test_non_gpt5_model_no_thinking_kwargs(self, monkeypatch, mock_logger):
        """Test non-GPT-5 models do not trigger reasoning_effort logic."""
        fake_settings = create_mock_settings("high")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        non_gpt5_models = ["gpt-4o", "gpt-4-turbo", "claude-3-5-sonnet"]

        for model in non_gpt5_models:
            with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
                mock_completion.return_value = create_mock_acompletion_response()

                handler = LiteLLMAIHandler()
                await handler.chat_completion(
                    model=model,
                    system="test system",
                    user="test user"
                )

                # Should not have reasoning_effort in kwargs
                call_kwargs = mock_completion.call_args[1]
                assert "reasoning_effort" not in call_kwargs

    @pytest.mark.asyncio
    async def test_gpt5_suffix_removal(self, monkeypatch, mock_logger):
        """Test that _thinking suffix is properly removed from model name."""
        fake_settings = create_mock_settings("low")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5_thinking",
                system="test system",
                user="test user"
            )

            # Model should be transformed to openai/gpt-5
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["model"] == "openai/gpt-5"

    # ========== Group 4: Model Suffix Handling Tests ==========

    @pytest.mark.asyncio
    async def test_gpt5_thinking_suffix_default_medium(self, monkeypatch, mock_logger):
        """Test _thinking suffix models default to 'medium' when config is None."""
        fake_settings = create_mock_settings(None)
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07_thinking",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_regular_suffix_default_medium(self, monkeypatch, mock_logger):
        """Test regular GPT-5 models default to 'medium' when config is None."""
        fake_settings = create_mock_settings(None)
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_thinking_suffix_config_overrides_default(self, monkeypatch, mock_logger):
        """Test that config overrides the default for _thinking models."""
        fake_settings = create_mock_settings("high")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07_thinking",
                system="test system",
                user="test user"
            )

            # Should use 'high' from config, not 'medium' default
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "high"
            mock_logger.info.assert_any_call("Using reasoning_effort='high' for GPT-5 model")

    # ========== Group 5: Logging Behavior Tests ==========

    @pytest.mark.asyncio
    async def test_gpt5_info_logging_configured_value(self, monkeypatch, mock_logger):
        """Test info log when using configured value."""
        fake_settings = create_mock_settings("low")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Verify log
            mock_logger.info.assert_any_call("Using reasoning_effort='low' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_info_logging_default_value(self, monkeypatch, mock_logger):
        """Test info log when using default value."""
        fake_settings = create_mock_settings(None)
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Verify log
            mock_logger.info.assert_any_call("Using reasoning_effort='medium' for GPT-5 model")

    @pytest.mark.asyncio
    async def test_gpt5_warning_only_for_invalid_non_none(self, monkeypatch, mock_logger):
        """Test warning logged only for invalid non-None values."""
        # Test None - should not warn
        fake_settings = create_mock_settings(None)
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # No warning for None
            mock_logger.warning.assert_not_called()

        # Reset mock
        mock_logger.reset_mock()

        # Test invalid string - should warn
        fake_settings = create_mock_settings("ultra")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Warning should be logged for invalid value
            mock_logger.warning.assert_called_once()

    # ========== Group 6: Structure Validation Tests ==========

    @pytest.mark.asyncio
    async def test_thinking_kwargs_gpt5_structure(self, monkeypatch, mock_logger):
        """Test that thinking_kwargs_gpt5 has correct structure."""
        fake_settings = create_mock_settings("medium")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]

            # Verify structure
            assert "reasoning_effort" in call_kwargs
            assert call_kwargs["reasoning_effort"] == "medium"
            assert "allowed_openai_params" in call_kwargs
            assert isinstance(call_kwargs["allowed_openai_params"], list)
            assert "reasoning_effort" in call_kwargs["allowed_openai_params"]

    @pytest.mark.asyncio
    async def test_thinking_kwargs_not_created_for_non_gpt5(self, monkeypatch, mock_logger):
        """Test that thinking_kwargs_gpt5 is not created for non-GPT-5 models."""
        fake_settings = create_mock_settings("high")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-4o",
                system="test system",
                user="test user"
            )

            call_kwargs = mock_completion.call_args[1]

            # Should not have reasoning_effort keys
            assert "reasoning_effort" not in call_kwargs
            assert call_kwargs.get("allowed_openai_params") is None or "reasoning_effort" not in call_kwargs.get("allowed_openai_params", [])

    # ========== Group 7: Edge Cases ==========

    @pytest.mark.asyncio
    async def test_empty_string_reasoning_effort(self, monkeypatch, mock_logger):
        """Test empty string reasoning_effort is treated as invalid."""
        fake_settings = create_mock_settings("")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Should default to 'medium' and log warning
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"
            mock_logger.warning.assert_called_once()

    @pytest.mark.asyncio
    async def test_case_sensitive_reasoning_effort(self, monkeypatch, mock_logger):
        """Test that reasoning_effort validation is case-sensitive."""
        fake_settings = create_mock_settings("LOW")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Should treat uppercase as invalid and default to 'medium'
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"
            mock_logger.warning.assert_called_once()

    @pytest.mark.asyncio
    async def test_whitespace_reasoning_effort(self, monkeypatch, mock_logger):
        """Test that reasoning_effort with whitespace is treated as invalid."""
        fake_settings = create_mock_settings(" low ")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5-2025-08-07",
                system="test system",
                user="test user"
            )

            # Should treat value with whitespace as invalid
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"
            mock_logger.warning.assert_called_once()

    @pytest.mark.asyncio
    async def test_gpt5_prefix_match_only(self, monkeypatch, mock_logger):
        """Test that model.startswith('gpt-5') matching behavior.

        Note: The current logic uses startswith('gpt-5'), which means
        models like 'gpt-50' will also match (since 'gpt-50'.startswith('gpt-5') is True).
        This test documents the current behavior.
        """
        fake_settings = create_mock_settings("medium")
        monkeypatch.setattr(litellm_handler, "get_settings", lambda: fake_settings)

        # Test gpt-50 (will match due to startswith logic)
        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-50",
                system="test system",
                user="test user"
            )

            # Due to startswith('gpt-5'), gpt-50 will match and have reasoning_effort
            call_kwargs = mock_completion.call_args[1]
            assert "reasoning_effort" in call_kwargs

        # Reset mock
        mock_logger.reset_mock()

        # Test gpt-5 (should match)
        with patch('pr_agent.algo.ai_handlers.litellm_ai_handler.acompletion', new_callable=AsyncMock) as mock_completion:
            mock_completion.return_value = create_mock_acompletion_response()

            handler = LiteLLMAIHandler()
            await handler.chat_completion(
                model="gpt-5",
                system="test system",
                user="test user"
            )

            # Should have reasoning_effort
            call_kwargs = mock_completion.call_args[1]
            assert call_kwargs["reasoning_effort"] == "medium"


================================================
FILE: tests/unittest/test_load_yaml.py
================================================

# Generated by CodiumAI

import pytest
import yaml
from yaml.scanner import ScannerError

from pr_agent.algo.utils import load_yaml


class TestLoadYaml:
    #  Tests that load_yaml loads a valid YAML string
    def test_load_valid_yaml(self):
        yaml_str = 'name: John Smith\nage: 35'
        expected_output = {'name': 'John Smith', 'age': 35}
        assert load_yaml(yaml_str) == expected_output

    def test_load_invalid_yaml1(self):
        yaml_str = \
'''\
PR Analysis:
  Main theme: Enhancing the `/describe` command prompt by adding title and description
  Type of PR: Enhancement
  Relevant tests: No
  Focused PR: Yes, the PR is focused on enhancing the `/describe` command prompt.

PR Feedback:
  General suggestions: The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.
  Code feedback:
    - relevant file: pr_agent/settings/pr_description_prompts.toml
      suggestion: Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]
      relevant line: user="""PR Info: aaa
  Security concerns: No'''
        with pytest.raises(ScannerError):
            yaml.safe_load(yaml_str)

        expected_output = {'PR Analysis': {'Main theme': 'Enhancing the `/describe` command prompt by adding title and description', 'Type of PR': 'Enhancement', 'Relevant tests': False, 'Focused PR': 'Yes, the PR is focused on enhancing the `/describe` command prompt.'}, 'PR Feedback': {'General suggestions': 'The PR seems to be well-structured and focused on a specific enhancement. However, it would be beneficial to add tests to ensure the new feature works as expected.', 'Code feedback': [{'relevant file': 'pr_agent/settings/pr_description_prompts.toml\n', 'suggestion': "Consider using a more descriptive variable name than 'user' for the command prompt. A more descriptive name would make the code more readable and maintainable. [medium]", 'relevant line': 'user="""PR Info: aaa\n'}], 'Security concerns': False}}
        assert load_yaml(yaml_str) == expected_output

    def test_load_invalid_yaml2(self):
        yaml_str = '''\
- relevant file: src/app.py:
  suggestion content: The print statement is outside inside the if __name__ ==: \
'''
        with pytest.raises(ScannerError):
            yaml.safe_load(yaml_str)

        expected_output = [{'relevant file': 'src/app.py:\n', 'suggestion content': 'The print statement is outside inside the if __name__ ==:'}]
        assert load_yaml(yaml_str) == expected_output


================================================
FILE: tests/unittest/test_parse_code_suggestion.py
================================================

# Generated by CodiumAI
from pr_agent.algo.utils import parse_code_suggestion

"""
Code Analysis

Objective:
The objective of the function is to convert a dictionary into a markdown format. The function takes in a dictionary as
input and recursively converts it into a markdown format. The function is specifically designed to handle dictionaries
that contain code suggestions.

Inputs:
- output_data: a dictionary containing the data to be converted into markdown format

Flow:
- Initialize an empty string variable called markdown_text
- Create a dictionary of emojis to be used in the markdown format
- Iterate through the items in the input dictionary
- If the value is empty, skip to the next item
- If the value is a dictionary, recursively call the function with the value as input
- If the value is a list, iterate through the list and add each item to the markdown format
- If the value is not 'n/a', add it to the markdown format
- If the key is 'code suggestions', call the parse_code_suggestion function to handle the list of code suggestions
- Return the markdown format as a string

Outputs:
- markdown_text: a string containing the input dictionary converted into markdown format

Additional aspects:
- The function uses the textwrap module to indent code examples in the markdown format
- The parse_code_suggestion function is called to handle the 'code suggestions' key in the input dictionary
- The function uses emojis to add visual cues to the markdown format
"""


class TestParseCodeSuggestion:
    # Tests that function returns empty string when input is an empty dictionary
    def test_empty_dict(self):
        input_data = {}
        expected_output = "\n"  # modified to expect a newline character
        assert parse_code_suggestion(input_data) == expected_output


    # Tests that function returns correct output when 'before' or 'after' key has a non-string value
    def test_non_string_before_or_after(self):
        input_data = {
            "Code example": {
                "Before": 123,
                "After": ["a", "b", "c"]
            }
        }
        expected_output = "  - **Code example:**\n    - **Before:**\n        ```\n        123\n        ```\n    - **After:**\n        ```\n        ['a', 'b', 'c']\n        ```\n\n"  # noqa: E501
        assert parse_code_suggestion(input_data) == expected_output

    # Tests that function returns correct output when input dictionary does not have 'code example' key
    def test_no_code_example_key(self):
        code_suggestions = {
            'suggestion': 'Suggestion 1',
            'description': 'Description 1',
            'before': 'Before 1',
            'after': 'After 1'
        }
        expected_output = '   **suggestion:** Suggestion 1     \n   **description:** Description 1     \n   **before:** Before 1     \n   **after:** After 1     \n\n'  # noqa: E501
        assert parse_code_suggestion(code_suggestions) == expected_output

    # Tests that function returns correct output when input dictionary has 'code example' key
    def test_with_code_example_key(self):
        code_suggestions = {
            'suggestion': 'Suggestion 2',
            'description': 'Description 2',
            'code example': {
                'before': 'Before 2',
                'after': 'After 2'
            }
        }
        expected_output = '   **suggestion:** Suggestion 2     \n   **description:** Description 2     \n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n'  # noqa: E501
        assert parse_code_suggestion(code_suggestions) == expected_output


================================================
FILE: tests/unittest/test_pr_update_changelog.py
================================================
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from pr_agent.tools.pr_update_changelog import PRUpdateChangelog


class TestPRUpdateChangelog:
    """Test suite for the PR Update Changelog functionality."""
    
    @pytest.fixture
    def mock_git_provider(self):
        """Create a mock git provider."""
        provider = MagicMock()
        provider.get_pr_branch.return_value = "feature-branch"
        provider.get_pr_file_content.return_value = ""
        provider.pr.title = "Test PR"
        provider.get_pr_description.return_value = "Test description"
        provider.get_commit_messages.return_value = "fix: test commit"
        provider.get_languages.return_value = {"Python": 80, "JavaScript": 20}
        provider.get_files.return_value = ["test.py", "test.js"]
        return provider

    @pytest.fixture
    def mock_ai_handler(self):
        """Create a mock AI handler."""
        handler = MagicMock()
        handler.chat_completion = AsyncMock(return_value=("Test changelog entry", "stop"))
        return handler

    @pytest.fixture
    def changelog_tool(self, mock_git_provider, mock_ai_handler):
        """Create a PRUpdateChangelog instance with mocked dependencies."""
        with patch('pr_agent.tools.pr_update_changelog.get_git_provider', return_value=lambda url: mock_git_provider), \
             patch('pr_agent.tools.pr_update_changelog.get_main_pr_language', return_value="Python"), \
             patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings:
            
            # Configure mock settings
            mock_settings.return_value.pr_update_changelog.push_changelog_changes = False
            mock_settings.return_value.pr_update_changelog.extra_instructions = ""
            mock_settings.return_value.pr_update_changelog_prompt.system = "System prompt"
            mock_settings.return_value.pr_update_changelog_prompt.user = "User prompt"
            mock_settings.return_value.config.temperature = 0.2
            
            tool = PRUpdateChangelog("https://gitlab.com/test/repo/-/merge_requests/1", ai_handler=lambda: mock_ai_handler)
            return tool

    def test_get_changelog_file_with_existing_content(self, changelog_tool, mock_git_provider):
        """Test retrieving existing changelog content."""
        # Arrange
        existing_content = "# Changelog\n\n## v1.0.0\n- Initial release\n- Bug fixes"
        mock_git_provider.get_pr_file_content.return_value = existing_content
        
        # Act
        changelog_tool._get_changelog_file()
        
        # Assert
        assert changelog_tool.changelog_file == existing_content
        assert "# Changelog" in changelog_tool.changelog_file_str

    def test_get_changelog_file_with_no_existing_content(self, changelog_tool, mock_git_provider):
        """Test handling when no changelog file exists."""
        # Arrange
        mock_git_provider.get_pr_file_content.return_value = ""
        
        # Act
        changelog_tool._get_changelog_file()
        
        # Assert
        assert changelog_tool.changelog_file == ""
        assert "Example:" in changelog_tool.changelog_file_str  # Default template

    def test_get_changelog_file_with_bytes_content(self, changelog_tool, mock_git_provider):
        """Test handling when git provider returns bytes instead of string."""
        # Arrange
        content_bytes = b"# Changelog\n\n## v1.0.0\n- Initial release"
        mock_git_provider.get_pr_file_content.return_value = content_bytes
        
        # Act
        changelog_tool._get_changelog_file()
        
        # Assert
        assert isinstance(changelog_tool.changelog_file, str)
        assert changelog_tool.changelog_file == "# Changelog\n\n## v1.0.0\n- Initial release"

    def test_get_changelog_file_with_exception(self, changelog_tool, mock_git_provider):
        """Test handling exceptions during file retrieval."""
        # Arrange
        mock_git_provider.get_pr_file_content.side_effect = Exception("Network error")
        
        # Act
        changelog_tool._get_changelog_file()
        
        # Assert
        assert changelog_tool.changelog_file == ""
        assert changelog_tool.changelog_file_str == ""  # Exception should result in empty string, no default template

    def test_prepare_changelog_update_with_existing_content(self, changelog_tool):
        """Test preparing changelog update when existing content exists."""
        # Arrange
        changelog_tool.prediction = "## v1.1.0\n- New feature\n- Bug fix"
        changelog_tool.changelog_file = "# Changelog\n\n## v1.0.0\n- Initial release"
        changelog_tool.commit_changelog = True
        
        # Act
        new_content, answer = changelog_tool._prepare_changelog_update()
        
        # Assert
        assert new_content.startswith("## v1.1.0\n- New feature\n- Bug fix\n\n")
        assert "# Changelog\n\n## v1.0.0\n- Initial release" in new_content
        assert answer == "## v1.1.0\n- New feature\n- Bug fix"

    def test_prepare_changelog_update_without_existing_content(self, changelog_tool):
        """Test preparing changelog update when no existing content."""
        # Arrange
        changelog_tool.prediction = "## v1.0.0\n- Initial release"
        changelog_tool.changelog_file = ""
        changelog_tool.commit_changelog = True
        
        # Act
        new_content, answer = changelog_tool._prepare_changelog_update()
        
        # Assert
        assert new_content == "## v1.0.0\n- Initial release"
        assert answer == "## v1.0.0\n- Initial release"

    def test_prepare_changelog_update_no_commit(self, changelog_tool):
        """Test preparing changelog update when not committing."""
        # Arrange
        changelog_tool.prediction = "## v1.1.0\n- New feature"
        changelog_tool.changelog_file = ""
        changelog_tool.commit_changelog = False
        
        # Act
        new_content, answer = changelog_tool._prepare_changelog_update()
        
        # Assert
        assert new_content == "## v1.1.0\n- New feature"
        assert "to commit the new content" in answer

    @pytest.mark.asyncio
    async def test_run_without_push_support(self, changelog_tool, mock_git_provider):
        """Test running changelog update when git provider doesn't support pushing."""
        # Arrange
        delattr(mock_git_provider, 'create_or_update_pr_file')  # Remove the method
        changelog_tool.commit_changelog = True
        
        with patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings:
            mock_settings.return_value.pr_update_changelog.push_changelog_changes = True
            mock_settings.return_value.config.publish_output = True
            
            # Act
            await changelog_tool.run()
            
            # Assert
            mock_git_provider.publish_comment.assert_called_once()
            assert "not currently supported" in str(mock_git_provider.publish_comment.call_args)

    @pytest.mark.asyncio
    async def test_run_with_push_support(self, changelog_tool, mock_git_provider):
        """Test running changelog update when git provider supports pushing."""
        # Arrange
        mock_git_provider.create_or_update_pr_file = MagicMock()
        changelog_tool.commit_changelog = True
        changelog_tool.prediction = "## v1.1.0\n- New feature"
        
        with patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings, \
             patch('pr_agent.tools.pr_update_changelog.retry_with_fallback_models') as mock_retry, \
             patch('pr_agent.tools.pr_update_changelog.sleep'):
            
            mock_settings.return_value.pr_update_changelog.push_changelog_changes = True
            mock_settings.return_value.pr_update_changelog.get.return_value = True
            mock_settings.return_value.config.publish_output = True
            mock_settings.return_value.config.git_provider = "gitlab"
            mock_retry.return_value = None
            
            # Act
            await changelog_tool.run()
            
            # Assert
            mock_git_provider.create_or_update_pr_file.assert_called_once()
            call_args = mock_git_provider.create_or_update_pr_file.call_args
            assert call_args[1]['file_path'] == 'CHANGELOG.md'
            assert call_args[1]['branch'] == 'feature-branch'

    def test_push_changelog_update(self, changelog_tool, mock_git_provider):
        """Test the push changelog update functionality."""
        # Arrange
        mock_git_provider.create_or_update_pr_file = MagicMock()
        mock_git_provider.get_pr_branch.return_value = "feature-branch"
        new_content = "# Updated changelog content"
        answer = "Changes made"
        
        with patch('pr_agent.tools.pr_update_changelog.get_settings') as mock_settings, \
             patch('pr_agent.tools.pr_update_changelog.sleep'):
            
            mock_settings.return_value.pr_update_changelog.get.return_value = True
            
            # Act
            changelog_tool._push_changelog_update(new_content, answer)
            
            # Assert
            mock_git_provider.create_or_update_pr_file.assert_called_once_with(
                file_path="CHANGELOG.md",
                branch="feature-branch",
                contents=new_content,
                message="[skip ci] Update CHANGELOG.md"
            )

    def test_gitlab_provider_method_detection(self, changelog_tool, mock_git_provider):
        """Test that the tool correctly detects GitLab provider method availability."""
        # Arrange
        mock_git_provider.create_or_update_pr_file = MagicMock()
        
        # Act & Assert
        assert hasattr(mock_git_provider, "create_or_update_pr_file")

    @pytest.mark.parametrize("existing_content,new_entry,expected_order", [
        (
            "# Changelog\n\n## v1.0.0\n- Old feature", 
            "## v1.1.0\n- New feature",
            ["v1.1.0", "v1.0.0"]
        ),
        (
            "", 
            "## v1.0.0\n- Initial release",
            ["v1.0.0"]
        ),
        (
            "Some existing content", 
            "## v1.0.0\n- New entry",
            ["v1.0.0", "Some existing content"]
        ),
    ])
    def test_changelog_order_preservation(self, changelog_tool, existing_content, new_entry, expected_order):
        """Test that changelog entries are properly ordered (newest first)."""
        # Arrange
        changelog_tool.prediction = new_entry
        changelog_tool.changelog_file = existing_content
        changelog_tool.commit_changelog = True
        
        # Act
        new_content, _ = changelog_tool._prepare_changelog_update()
        
        # Assert
        for i, expected in enumerate(expected_order[:-1]):
            current_pos = new_content.find(expected)
            next_pos = new_content.find(expected_order[i + 1])
            assert current_pos < next_pos, f"Expected {expected} to come before {expected_order[i + 1]}" 

================================================
FILE: tests/unittest/test_secret_provider_factory.py
================================================
from unittest.mock import MagicMock, patch

import pytest

from pr_agent.secret_providers import get_secret_provider


class TestSecretProviderFactory:

    def test_get_secret_provider_none_when_not_configured(self):
        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = None
            mock_get_settings.return_value = settings

            result = get_secret_provider()
            assert result is None

    def test_get_secret_provider_google_cloud_storage(self):
        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = "google_cloud_storage"
            settings.config.secret_provider = "google_cloud_storage"
            mock_get_settings.return_value = settings

            with patch('pr_agent.secret_providers.google_cloud_storage_secret_provider.GoogleCloudStorageSecretProvider') as MockProvider:
                mock_instance = MagicMock()
                MockProvider.return_value = mock_instance
                
                result = get_secret_provider()
                assert result is mock_instance
                MockProvider.assert_called_once()

    def test_get_secret_provider_aws_secrets_manager(self):
        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = "aws_secrets_manager"
            settings.config.secret_provider = "aws_secrets_manager"
            mock_get_settings.return_value = settings

            with patch('pr_agent.secret_providers.aws_secrets_manager_provider.AWSSecretsManagerProvider') as MockProvider:
                mock_instance = MagicMock()
                MockProvider.return_value = mock_instance
                
                result = get_secret_provider()
                assert result is mock_instance
                MockProvider.assert_called_once()

    def test_get_secret_provider_unknown_provider(self):
        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = "unknown_provider"
            settings.config.secret_provider = "unknown_provider"
            mock_get_settings.return_value = settings

            with pytest.raises(ValueError, match="Unknown SECRET_PROVIDER"):
                get_secret_provider()

    def test_get_secret_provider_initialization_error(self):
        with patch('pr_agent.secret_providers.get_settings') as mock_get_settings:
            settings = MagicMock()
            settings.get.return_value = "aws_secrets_manager"
            settings.config.secret_provider = "aws_secrets_manager"
            mock_get_settings.return_value = settings

            with patch('pr_agent.secret_providers.aws_secrets_manager_provider.AWSSecretsManagerProvider') as MockProvider:
                MockProvider.side_effect = Exception("Initialization failed")
                
                with pytest.raises(ValueError, match="Failed to initialize aws_secrets_manager secret provider"):
                    get_secret_provider() 


================================================
FILE: tests/unittest/test_similar_issue_non_github.py
================================================
import pytest

from pr_agent.tools.pr_similar_issue import PRSimilarIssue


@pytest.mark.asyncio
async def test_similar_issue_non_github_publishes_message(monkeypatch):
    class FakeProvider:
        def __init__(self):
            self.comments = []

        def publish_comment(self, body):
            self.comments.append(body)

    fake_provider = FakeProvider()

    class FakeSettings:
        class config:
            git_provider = "gitlab"
            publish_output = True

    monkeypatch.setattr("pr_agent.tools.pr_similar_issue.get_settings", lambda: FakeSettings)
    monkeypatch.setattr(
        "pr_agent.git_providers.get_git_provider_with_context",
        lambda _: fake_provider,
    )

    tool = PRSimilarIssue("https://gitlab.example.com/group/repo/-/merge_requests/1", None)
    result = await tool.run()

    assert result == ""
    assert fake_provider.comments == [
        "The /similar_issue tool is currently supported only for GitHub."
    ]


@pytest.mark.asyncio
async def test_similar_issue_non_github_no_publish(monkeypatch):
    class FakeSettings:
        class config:
            git_provider = "gitlab"
            publish_output = False

    monkeypatch.setattr("pr_agent.tools.pr_similar_issue.get_settings", lambda: FakeSettings)

    tool = PRSimilarIssue("https://gitlab.example.com/group/repo/-/merge_requests/1", None)
    result = await tool.run()

    assert result == ""


================================================
FILE: tests/unittest/test_try_fix_yaml.py
================================================
# Generated by CodiumAI

from pr_agent.algo.utils import try_fix_yaml


class TestTryFixYaml:

    # The function successfully parses a valid YAML string.
    def test_valid_yaml(self):
        review_text = "key: value\n"
        expected_output = {"key": "value"}
        assert try_fix_yaml(review_text) == expected_output

    # The function adds '|-' to 'relevant line:' if it is not already present and successfully parses the YAML string.
    def test_add_relevant_line(self):
        review_text = "relevant line: value: 3\n"
        expected_output = {'relevant line': 'value: 3\n'}
        assert try_fix_yaml(review_text) == expected_output

    # The function extracts YAML snippet
    def test_extract_snippet(self):
        review_text = '''\
Here is the answer in YAML format:

```yaml
name: John Smith
age: 35
```
'''
        expected_output = {'name': 'John Smith', 'age': 35}
        assert try_fix_yaml(review_text) == expected_output


    # The YAML string is empty.
    def test_empty_yaml_fixed(self):
        review_text = ""
        assert try_fix_yaml(review_text) is None


    # The function extracts YAML snippet
    def test_no_initial_yaml(self):
        review_text = '''\
I suggest the following:

code_suggestions:
- relevant_file: |
    src/index.ts
  label: |
    best practice

- relevant_file: |
    src/index2.ts
  label: |
    enhancement
```

We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
'''
        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement'}]}

        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output

    def test_with_initial_yaml(self):
        review_text = '''\
I suggest the following:

```
code_suggestions:
- relevant_file: |
    src/index.ts
  label: |
    best practice

- relevant_file: |
    src/index2.ts
  label: |
    enhancement
```

We can further improve the code by using the `const` keyword instead of `var` in the `src/index.ts` file.
'''
        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement'}]}
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output


    def test_with_brackets_yaml_content(self):
        review_text = '''\
{
code_suggestions:
- relevant_file: |
    src/index.ts
  label: |
    best practice

- relevant_file: |
    src/index2.ts
  label: |
    enhancement
}
'''
        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement'}]}
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output

    def test_tab_indent_yaml(self):
        review_text = '''\
code_suggestions:
- relevant_file: |
    src/index.ts
  label: |
\tbest practice

- relevant_file: |
    src/index2.ts
  label: |
    enhancement
'''
        expected_output = {'code_suggestions': [{'relevant_file': 'src/index.ts\n', 'label': 'best practice\n'}, {'relevant_file': 'src/index2.ts\n', 'label': 'enhancement\n'}]}
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='label') == expected_output


    def test_leading_plus_mark_code(self):
        review_text = '''\
code_suggestions:
- relevant_file: |
    src/index.ts
  label: |
    best practice
  existing_code: |
+   var router = createBrowserRouter([
  improved_code: |
+   const router = createBrowserRouter([
'''
        expected_output = {'code_suggestions': [{
            'relevant_file': 'src/index.ts\n',
            'label': 'best practice\n',
            'existing_code': 'var router = createBrowserRouter([\n',
            'improved_code': 'const router = createBrowserRouter([\n'
        }]}
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='improved_code') == expected_output


    def test_inconsistent_indentation_in_block_scalar_yaml(self):
        """
            This test case represents a situation where the AI outputs the opening '{' with 5 spaces
            (resulting in an inferred indent level of 5), while the closing '}' is output with only 4 spaces.
            This inconsistency makes it impossible for the YAML parser to automatically determine the correct
            indent level, causing a parsing failure.

            The root cause may be the LLM miscounting spaces or misunderstanding the active block scalar context
            while generating YAML output.
        """

        review_text = '''\
code_suggestions:
- relevant_file: |
    tsconfig.json
  existing_code: |
     {
        "key1": "value1",
        "key2": {
          "subkey": "value"
         }
    }
'''
        expected_json = '''\
 {
    "key1": "value1",
    "key2": {
      "subkey": "value"
     }
}
'''
        expected_output = {
            'code_suggestions': [{
                'relevant_file': 'tsconfig.json\n',
                'existing_code': expected_json
            }]
        }
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output


    def test_inconsistent_and_insufficient_indentation_in_block_scalar_yaml(self):
        """
            This test case reproduces a YAML parsing failure where the block scalar content
            generated by the AI includes inconsistent and insufficient indentation levels.

            The root cause may be the LLM miscounting spaces or misunderstanding the active block scalar context
            while generating YAML output.
        """

        review_text = '''\
code_suggestions:
- relevant_file: |
    tsconfig.json
  existing_code: |
    {
      "key1": "value1",
      "key2": {
        "subkey": "value"
      }
  }
'''
        expected_json = '''\
{
  "key1": "value1",
  "key2": {
    "subkey": "value"
  }
}
'''
        expected_output = {
            'code_suggestions': [{
                'relevant_file': 'tsconfig.json\n',
                'existing_code': expected_json
            }]
        }
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output


    def test_wrong_indentation_code_block_scalar(self):
        review_text = '''\
code_suggestions:
- relevant_file: |
    a.c
  existing_code: |
  int sum(int a, int b) {
    return a + b;
  }

  int sub(int a, int b) {
    return a - b;
  }
'''
        expected_code_block = '''\
int sum(int a, int b) {
  return a + b;
}

int sub(int a, int b) {
  return a - b;
}
'''
        expected_output = {'code_suggestions': [{'relevant_file': 'a.c\n', 'existing_code': '  int sum(int a, int b) {\n    return a + b;\n  }\n\n  int sub(int a, int b) {\n    return a - b;\n  }\n'}]}
        assert try_fix_yaml(review_text, first_key='code_suggestions', last_key='existing_code') == expected_output