Fixed conflicts

2025-07-21 04:50:39 +08:00 · 2025-05-22 14:54:26 +07:00
parent b686a707a4 e045617243
commit 930cd69909
33 changed files with 1246 additions and 244 deletions
--- a/pr_agent/algo/init.py
+++ b/pr_agent/algo/init.py
@ -58,6 +58,7 @@ MAX_TOKENS = {
    'vertex_ai/claude-3-7-sonnet@20250219': 200000,
    'vertex_ai/gemini-1.5-pro': 1048576,
    'vertex_ai/gemini-2.5-pro-preview-03-25': 1048576,
+    'vertex_ai/gemini-2.5-pro-preview-05-06': 1048576,
    'vertex_ai/gemini-1.5-flash': 1048576,
    'vertex_ai/gemini-2.0-flash': 1048576,
    'vertex_ai/gemini-2.5-flash-preview-04-17': 1048576,
@ -66,6 +67,7 @@ MAX_TOKENS = {
    'gemini/gemini-1.5-flash': 1048576,
    'gemini/gemini-2.0-flash': 1048576,
    'gemini/gemini-2.5-pro-preview-03-25': 1048576,
+    'gemini/gemini-2.5-pro-preview-05-06': 1048576,
    'codechat-bison': 6144,
    'codechat-bison-32k': 32000,
    'anthropic.claude-instant-v1': 100000,
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@ -59,6 +59,7 @@ class LiteLLMAIHandler(BaseAiHandler):
            litellm.api_version = get_settings().openai.api_version
        if get_settings().get("OPENAI.API_BASE", None):
            litellm.api_base = get_settings().openai.api_base
+            self.api_base = get_settings().openai.api_base
        if get_settings().get("ANTHROPIC.KEY", None):
            litellm.anthropic_key = get_settings().anthropic.key
        if get_settings().get("COHERE.KEY", None):
@ -370,12 +371,12 @@ class LiteLLMAIHandler(BaseAiHandler):
                get_logger().info(f"\nUser prompt:\n{user}")

            response = await acompletion(**kwargs)
-        except (openai.APIError, openai.APITimeoutError) as e:
-            get_logger().warning(f"Error during LLM inference: {e}")
-            raise
        except (openai.RateLimitError) as e:
            get_logger().error(f"Rate limit error during LLM inference: {e}")
            raise
+        except (openai.APIError, openai.APITimeoutError) as e:
+            get_logger().warning(f"Error during LLM inference: {e}")
+            raise
        except (Exception) as e:
            get_logger().warning(f"Unknown error during LLM inference: {e}")
            raise openai.APIError from e
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -731,8 +731,9 @@ def try_fix_yaml(response_text: str,
                 response_text_original="") -> dict:
    response_text_lines = response_text.split('\n')

-    keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:']
+    keys_yaml = ['relevant line:', 'suggestion content:', 'relevant file:', 'existing code:', 'improved code:', 'label:']
    keys_yaml = keys_yaml + keys_fix_yaml
+
    # first fallback - try to convert 'relevant line: ...' to relevant line: |-\n        ...'
    response_text_lines_copy = response_text_lines.copy()
    for i in range(0, len(response_text_lines_copy)):
@ -747,8 +748,29 @@ def try_fix_yaml(response_text: str,
    except:
        pass

-    # second fallback - try to extract only range from first ```yaml to ````
-    snippet_pattern = r'```(yaml)?[\s\S]*?```'
+    # 1.5 fallback - try to convert '|' to '|2'. Will solve cases of indent decreasing during the code
+    response_text_copy = copy.deepcopy(response_text)
+    response_text_copy = response_text_copy.replace('|\n', '|2\n')
+    try:
+        data = yaml.safe_load(response_text_copy)
+        get_logger().info(f"Successfully parsed AI prediction after replacing | with |2")
+        return data
+    except:
+        # if it fails, we can try to add spaces to the lines that are not indented properly, and contain '}'.
+        response_text_lines_copy = response_text_copy.split('\n')
+        for i in range(0, len(response_text_lines_copy)):
+            initial_space = len(response_text_lines_copy[i]) - len(response_text_lines_copy[i].lstrip())
+            if initial_space == 2 and '|2' not in response_text_lines_copy[i] and '}' in response_text_lines_copy[i]:
+                response_text_lines_copy[i] = '    ' + response_text_lines_copy[i].lstrip()
+        try:
+            data = yaml.safe_load('\n'.join(response_text_lines_copy))
+            get_logger().info(f"Successfully parsed AI prediction after replacing | with |2 and adding spaces")
+            return data
+        except:
+            pass
+
+    # second fallback - try to extract only range from first ```yaml to the last ```
+    snippet_pattern = r'```yaml([\s\S]*?)```(?=\s*$|")'
    snippet = re.search(snippet_pattern, '\n'.join(response_text_lines_copy))
    if not snippet:
        snippet = re.search(snippet_pattern, response_text_original) # before we removed the "```"
@ -803,16 +825,47 @@ def try_fix_yaml(response_text: str,
    except:
        pass

-    # sixth fallback - try to remove last lines
-    for i in range(1, len(response_text_lines)):
-        response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
+    # sixth fallback - replace tabs with spaces
+    if '\t' in response_text:
+        response_text_copy = copy.deepcopy(response_text)
+        response_text_copy = response_text_copy.replace('\t', '    ')
        try:
-            data = yaml.safe_load(response_text_lines_tmp)
-            get_logger().info(f"Successfully parsed AI prediction after removing {i} lines")
+            data = yaml.safe_load(response_text_copy)
+            get_logger().info(f"Successfully parsed AI prediction after replacing tabs with spaces")
            return data
        except:
            pass

+    # seventh fallback - add indent for sections of code blocks
+    response_text_copy = copy.deepcopy(response_text)
+    response_text_copy_lines = response_text_copy.split('\n')
+    start_line = -1
+    for i, line in enumerate(response_text_copy_lines):
+        if 'existing_code:' in line or 'improved_code:' in line:
+            start_line = i
+        elif line.endswith(': |') or line.endswith(': |-') or line.endswith(': |2') or line.endswith(':'):
+            start_line = -1
+        elif start_line != -1:
+            response_text_copy_lines[i] = '    ' + line
+    response_text_copy = '\n'.join(response_text_copy_lines)
+    try:
+        data = yaml.safe_load(response_text_copy)
+        get_logger().info(f"Successfully parsed AI prediction after adding indent for sections of code blocks")
+        return data
+    except:
+        pass
+
+    # # sixth fallback - try to remove last lines
+    # for i in range(1, len(response_text_lines)):
+    #     response_text_lines_tmp = '\n'.join(response_text_lines[:-i])
+    #     try:
+    #         data = yaml.safe_load(response_text_lines_tmp)
+    #         get_logger().info(f"Successfully parsed AI prediction after removing {i} lines")
+    #         return data
+    #     except:
+    #         pass
+
+

 def set_custom_labels(variables, git_provider=None):
    if not get_settings().config.enable_custom_labels:
--- a/pr_agent/git_providers/init.py
+++ b/pr_agent/git_providers/init.py
@ -12,6 +12,7 @@ from pr_agent.git_providers.gitea_provider import GiteaProvider
 from pr_agent.git_providers.github_provider import GithubProvider
 from pr_agent.git_providers.gitlab_provider import GitLabProvider
 from pr_agent.git_providers.local_git_provider import LocalGitProvider
+from pr_agent.git_providers.gitea_provider import GiteaProvider

 _GIT_PROVIDERS = {
    'github': GithubProvider,
@ -22,7 +23,7 @@ _GIT_PROVIDERS = {
    'codecommit': CodeCommitProvider,
    'local': LocalGitProvider,
    'gerrit': GerritProvider,
-    'gitea': GiteaProvider
+    'gitea': GiteaProvider,
 }


--- a/pr_agent/git_providers/azuredevops_provider.py
+++ b/pr_agent/git_providers/azuredevops_provider.py
@ -618,7 +618,7 @@ class AzureDevopsProvider(GitProvider):
            return pr_id
        except Exception as e:
            if get_settings().config.verbosity_level >= 2:
-                get_logger().info(f"Failed to get pr id, error: {e}")
+                get_logger().info(f"Failed to get PR id, error: {e}")
            return ""

    def publish_file_comments(self, file_comments: list) -> bool:
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -96,7 +96,7 @@ class GithubProvider(GitProvider):
                parsed_url = urlparse(given_url)
                repo_path = (parsed_url.path.split('.git')[0])[1:] # /<owner>/<repo>.git -> <owner>/<repo>
            if not repo_path:
-                get_logger().error(f"url is neither an issues url nor a pr url nor a valid git url: {given_url}. Returning empty result.")
+                get_logger().error(f"url is neither an issues url nor a PR url nor a valid git url: {given_url}. Returning empty result.")
                return ""
            return repo_path
        except Exception as e:
--- a/pr_agent/git_providers/utils.py
+++ b/pr_agent/git_providers/utils.py
@ -6,8 +6,7 @@ from dynaconf import Dynaconf
 from starlette_context import context

 from pr_agent.config_loader import get_settings
-from pr_agent.git_providers import (get_git_provider,
-                                    get_git_provider_with_context)
+from pr_agent.git_providers import get_git_provider_with_context
 from pr_agent.log import get_logger


--- a/pr_agent/identity_providers/identity_provider.py
+++ b/pr_agent/identity_providers/identity_provider.py
@ -10,7 +10,7 @@ class Eligibility(Enum):

 class IdentityProvider(ABC):
    @abstractmethod
-    def verify_eligibility(self, git_provider, git_provier_id, pr_url):
+    def verify_eligibility(self, git_provider, git_provider_id, pr_url):
        pass

    @abstractmethod
--- a/pr_agent/servers/bitbucket_app.py
+++ b/pr_agent/servers/bitbucket_app.py
@ -127,6 +127,14 @@ def should_process_pr_logic(data) -> bool:
        source_branch = pr_data.get("source", {}).get("branch", {}).get("name", "")
        target_branch = pr_data.get("destination", {}).get("branch", {}).get("name", "")
        sender = _get_username(data)
+        repo_full_name = pr_data.get("destination", {}).get("repository", {}).get("full_name", "")
+
+        # logic to ignore PRs from specific repositories
+        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
+        if repo_full_name and ignore_repos:
+            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
+                get_logger().info(f"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
+                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
--- a/pr_agent/servers/github_app.py
+++ b/pr_agent/servers/github_app.py
@ -258,6 +258,14 @@ def should_process_pr_logic(body) -> bool:
        source_branch = pull_request.get("head", {}).get("ref", "")
        target_branch = pull_request.get("base", {}).get("ref", "")
        sender = body.get("sender", {}).get("login")
+        repo_full_name = body.get("repository", {}).get("full_name", "")
+
+        # logic to ignore PRs from specific repositories
+        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
+        if ignore_repos and repo_full_name:
+            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
+                get_logger().info(f"Ignoring PR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
+                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
--- a/pr_agent/servers/gitlab_webhook.py
+++ b/pr_agent/servers/gitlab_webhook.py
@ -113,6 +113,14 @@ def should_process_pr_logic(data) -> bool:
            return False
        title = data['object_attributes'].get('title')
        sender = data.get("user", {}).get("username", "")
+        repo_full_name = data.get('project', {}).get('path_with_namespace', "")
+
+        # logic to ignore PRs from specific repositories
+        ignore_repos = get_settings().get("CONFIG.IGNORE_REPOSITORIES", [])
+        if ignore_repos and repo_full_name:
+            if any(re.search(regex, repo_full_name) for regex in ignore_repos):
+                get_logger().info(f"Ignoring MR from repository '{repo_full_name}' due to 'config.ignore_repositories' setting")
+                return False

        # logic to ignore PRs from specific users
        ignore_pr_users = get_settings().get("CONFIG.IGNORE_PR_AUTHORS", [])
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -55,6 +55,7 @@ ignore_pr_target_branches = [] # a list of regular expressions of target branche
 ignore_pr_source_branches = [] # a list of regular expressions of source branches to ignore from PR agent when an PR is created
 ignore_pr_labels = [] # labels to ignore from PR agent when an PR is created
 ignore_pr_authors = [] # authors to ignore from PR agent when an PR is created
+ignore_repositories = [] # a list of regular expressions of repository full names (e.g. "org/repo") to ignore from PR agent processing
 #
 is_auto_command = false # will be auto-set to true if the command is triggered by an automation
 enable_ai_metadata = false # will enable adding ai metadata
@ -80,6 +81,7 @@ require_ticket_analysis_review=true
 # general options
 persistent_comment=true
 extra_instructions = ""
+num_max_findings = 3
 final_update_message = true
 # review labels
 enable_review_labels_security=true
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@ -98,7 +98,7 @@ class Review(BaseModel):
 {%- if question_str %}
    insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
 {%- endif %}
-    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-3 issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
+    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-{{ num_max_findings }} issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
 {%- if require_security_review %}
    security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
 {%- endif %}
--- a/pr_agent/tools/pr_description.py
+++ b/pr_agent/tools/pr_description.py
@ -199,7 +199,7 @@ class PRDescription:

    async def _prepare_prediction(self, model: str) -> None:
        if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
-            get_logger().info("Markers were enabled, but user description does not contain markers. skipping AI prediction")
+            get_logger().info("Markers were enabled, but user description does not contain markers. Skipping AI prediction")
            return None

        large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings()
@ -707,7 +707,7 @@ class PRDescription:
            pr_body += """</tr></tbody></table>"""

        except Exception as e:
-            get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {str(e)}")
+            get_logger().error(f"Error processing PR files to markdown {self.pr_id}: {str(e)}")
            pass
        return pr_body, pr_comments

--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -81,6 +81,7 @@ class PRReviewer:
            "language": self.main_language,
            "diff": "",  # empty diff for initial calculation
            "num_pr_files": self.git_provider.get_num_of_files(),
+            "num_max_findings": get_settings().pr_reviewer.num_max_findings,
            "require_score": get_settings().pr_reviewer.require_score_review,
            "require_tests": get_settings().pr_reviewer.require_tests_review,
            "require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
@ -316,7 +317,9 @@ class PRReviewer:
            get_logger().exception(f"Failed to remove previous review comment, error: {e}")

    def _can_run_incremental_review(self) -> bool:
-        """Checks if we can run incremental review according the various configurations and previous review"""
+        """
+        Checks if we can run incremental review according the various configurations and previous review.
+        """
        # checking if running is auto mode but there are no new commits
        if self.is_auto and not self.incremental.first_new_commit_sha:
            get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits")