diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index f38bb5e8..32a1965d 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -11,6 +11,7 @@ from enum import Enum from typing import Any, List, Tuple import yaml +from pydantic import BaseModel from starlette_context import context from pr_agent.algo import MAX_TOKENS @@ -19,6 +20,12 @@ from pr_agent.config_loader import get_settings, global_settings from pr_agent.algo.types import FilePatchInfo from pr_agent.log import get_logger +class Range(BaseModel): + line_start: int # should be 0-indexed + line_end: int + column_start: int = -1 + column_end: int = -1 + class ModelType(str, Enum): REGULAR = "regular" TURBO = "turbo" diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 1ac153d4..4709c05a 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -130,6 +130,18 @@ class AzureDevopsProvider(GitProvider): def get_pr_description_full(self) -> str: return self.pr.description + def delete_comment(self, comment): + try: + self.azure_devops_client.delete_comment( + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + thread_id=comment.thread_id, + comment_id=comment.id, + project=self.workspace_slug, + ) + except Exception as e: + get_logger().exception(f"Failed to delete comment, error: {e}") + def edit_comment(self, comment, body: str): try: self.azure_devops_client.update_comment( @@ -525,10 +537,18 @@ class AzureDevopsProvider(GitProvider): def get_user_id(self): return 0 + def get_issue_comments(self): - raise NotImplementedError( - "Azure DevOps provider does not support issue comments yet" - ) + threads = self.azure_devops_client.get_threads(repository_id=self.repo_slug, pull_request_id=self.pr_num, project=self.workspace_slug) + threads.reverse() + comment_list = [] + for thread in threads: + for comment in thread.comments: + if comment.content and comment not in comment_list: + comment.body = comment.content + comment.thread_id = thread.id + comment_list.append(comment) + return comment_list def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: return True diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index aef129a5..2e514ac8 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -3,10 +3,11 @@ from abc import ABC, abstractmethod # enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED) from typing import Optional +from pr_agent.algo.utils import Range from pr_agent.config_loader import get_settings from pr_agent.algo.types import FilePatchInfo from pr_agent.log import get_logger - +MAX_FILES_ALLOWED_FULL = 50 class GitProvider(ABC): @abstractmethod @@ -51,6 +52,12 @@ class GitProvider(ABC): def edit_comment(self, comment, body: str): pass + def edit_comment_from_comment_id(self, comment_id: int, body: str): + pass + + def get_comment_body_from_comment_id(self, comment_id: int) -> str: + pass + def reply_to_comment_from_comment_id(self, comment_id: int, body: str): pass @@ -74,6 +81,7 @@ class GitProvider(ABC): # if the existing description wasn't generated by the pr-agent, just return it as-is if not self._is_generated_by_pr_agent(description_lowercase): get_logger().info(f"Existing description was not generated by the pr-agent") + self.user_description = description return description # if the existing description was generated by the pr-agent, but it doesn't contain a user description, @@ -120,12 +128,18 @@ class GitProvider(ABC): def get_repo_settings(self): pass + def get_workspace_name(self): + return "" + def get_pr_id(self): return "" def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str: return "" + def get_lines_link_original_file(self, filepath:str, component_range: Range) -> str: + return "" + #### comments operations #### @abstractmethod def publish_comment(self, pr_comment: str, is_temporary: bool = False): @@ -166,6 +180,7 @@ class GitProvider(ABC): pass self.publish_comment(pr_comment) + @abstractmethod def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): pass @@ -193,6 +208,9 @@ class GitProvider(ABC): def get_comment_url(self, comment) -> str: return "" + def delete_comment(self, comment): + comment.delete() + #### labels operations #### @abstractmethod def publish_labels(self, labels): diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 638d79a1..3136ea7d 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -1,3 +1,4 @@ +import itertools import time import hashlib from datetime import datetime @@ -10,11 +11,11 @@ from starlette_context import context from ..algo.file_filter import filter_ignored from ..algo.language_handler import is_valid_file -from ..algo.utils import PRReviewHeader, load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file +from ..algo.utils import PRReviewHeader, load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file, Range from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded -from .git_provider import GitProvider, IncrementalPR +from .git_provider import GitProvider, IncrementalPR, MAX_FILES_ALLOWED_FULL from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo @@ -164,20 +165,36 @@ class GithubProvider(GitProvider): diff_files = [] invalid_files_names = [] + counter_valid = 0 for file in files: if not is_valid_file(file.filename): invalid_files_names.append(file.filename) continue - new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub patch = file.patch + # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only + counter_valid += 1 + avoid_load = False + if counter_valid >= MAX_FILES_ALLOWED_FULL and patch and not self.incremental.is_incremental: + avoid_load = True + if counter_valid == MAX_FILES_ALLOWED_FULL: + get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") + + if avoid_load: + new_file_content_str = "" + else: + new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub + if self.incremental.is_incremental and self.unreviewed_files_set: original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha) patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) self.unreviewed_files_set[file.filename] = patch else: - original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) + if avoid_load: + original_file_content_str = "" + else: + original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) if not patch: patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) @@ -427,6 +444,16 @@ class GithubProvider(GitProvider): def edit_comment(self, comment, body: str): comment.edit(body=body) + def edit_comment_from_comment_id(self, comment_id: int, body: str): + try: + # self.pr.get_issue_comment(comment_id).edit(body) + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "PATCH", f"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}", + input={"body": body} + ) + except Exception as e: + get_logger().exception(f"Failed to edit comment, error: {e}") + def reply_to_comment_from_comment_id(self, comment_id: int, body: str): try: # self.pr.get_issue_comment(comment_id).edit(body) @@ -437,6 +464,50 @@ class GithubProvider(GitProvider): except Exception as e: get_logger().exception(f"Failed to reply comment, error: {e}") + def get_comment_body_from_comment_id(self, comment_id: int): + try: + # self.pr.get_issue_comment(comment_id).edit(body) + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "GET", f"{self.base_url}/repos/{self.repo}/issues/comments/{comment_id}" + ) + return data_patch.get("body","") + except Exception as e: + get_logger().exception(f"Failed to edit comment, error: {e}") + return None + + def publish_file_comments(self, file_comments: list) -> bool: + try: + headers, existing_comments = self.pr._requester.requestJsonAndCheck( + "GET", f"{self.pr.url}/comments" + ) + for comment in file_comments: + comment['commit_id'] = self.last_commit_id.sha + + found = False + for existing_comment in existing_comments: + comment['commit_id'] = self.last_commit_id.sha + our_app_name = get_settings().get("GITHUB.APP_NAME", "") + same_comment_creator = False + if self.deployment_type == 'app': + same_comment_creator = our_app_name.lower() in existing_comment['user']['login'].lower() + elif self.deployment_type == 'user': + same_comment_creator = self.github_user_id == existing_comment['user']['login'] + if existing_comment['subject_type'] == 'file' and comment['path'] == existing_comment['path'] and same_comment_creator: + headers, data_patch = self.pr._requester.requestJsonAndCheck( + "PATCH", f"{self.base_url}/repos/{self.repo}/pulls/comments/{existing_comment['id']}", input={"body":comment['body']} + ) + found = True + break + if not found: + headers, data_post = self.pr._requester.requestJsonAndCheck( + "POST", f"{self.pr.url}/comments", input=comment + ) + return True + except Exception as e: + if get_settings().config.verbosity_level >= 2: + get_logger().error(f"Failed to publish diffview file summary, error: {e}") + return False + def remove_initial_comment(self): try: for comment in getattr(self.pr, 'comments_list', []): @@ -461,6 +532,11 @@ class GithubProvider(GitProvider): def get_pr_branch(self): return self.pr.head.ref + def get_pr_owner_id(self) -> str | None: + if not self.repo: + return None + return self.repo.split('/')[0] + def get_pr_description_full(self): return self.pr.body @@ -495,6 +571,9 @@ class GithubProvider(GitProvider): except Exception: return "" + def get_workspace_name(self): + return self.repo.split('/')[0] + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: if disable_eyes: return None @@ -673,7 +752,7 @@ class GithubProvider(GitProvider): def get_repo_labels(self): labels = self.repo_obj.get_labels() - return [label for label in labels] + return [label for label in itertools.islice(labels, 50)] def get_commit_messages(self): """ @@ -728,6 +807,29 @@ class GithubProvider(GitProvider): link = f"{self.base_url_html}/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}" return link + def get_lines_link_original_file(self, filepath: str, component_range: Range) -> str: + """ + Returns the link to the original file on GitHub that corresponds to the given filepath and component range. + + Args: + filepath (str): The path of the file. + component_range (Range): The range of lines that represent the component. + + Returns: + str: The link to the original file on GitHub. + + Example: + >>> filepath = "path/to/file.py" + >>> component_range = Range(line_start=10, line_end=20) + >>> link = get_lines_link_original_file(filepath, component_range) + >>> print(link) + "https://github.com/{repo}/blob/{commit_sha}/{filepath}/#L11-L21" + """ + line_start = component_range.line_start + 1 + line_end = component_range.line_end + 1 + link = (f"https://github.com/{self.repo}/blob/{self.last_commit_id.sha}/{filepath}/" + f"#L{line_start}-L{line_end}") + return link def get_pr_id(self): try: @@ -747,4 +849,4 @@ class GithubProvider(GitProvider): return False def calc_pr_statistics(self, pull_request_data: dict): - return {} + return {} diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 0461535b..d414f9d3 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -4,13 +4,14 @@ from typing import Optional, Tuple from urllib.parse import urlparse import gitlab +import requests from gitlab import GitlabGetError from ..algo.file_filter import filter_ignored from ..algo.language_handler import is_valid_file from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file from ..config_loader import get_settings -from .git_provider import GitProvider +from .git_provider import GitProvider, MAX_FILES_ALLOWED_FULL from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from ..log import get_logger @@ -45,7 +46,8 @@ class GitLabProvider(GitProvider): self.incremental = incremental def is_supported(self, capability: str) -> bool: - if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments']: # gfm_markdown is supported in gitlab ! + if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', + 'publish_file_comments']: # gfm_markdown is supported in gitlab ! return False return True @@ -101,13 +103,23 @@ class GitLabProvider(GitProvider): diff_files = [] invalid_files_names = [] + counter_valid = 0 for diff in diffs: if not is_valid_file(diff['new_path']): invalid_files_names.append(diff['new_path']) continue - original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) - new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) + # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only + counter_valid += 1 + if counter_valid < MAX_FILES_ALLOWED_FULL or not diff['diff']: + original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) + new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) + else: + if counter_valid == MAX_FILES_ALLOWED_FULL: + get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") + original_file_content_str = '' + new_file_content_str = '' + try: if isinstance(original_file_content_str, bytes): original_file_content_str = bytes.decode(original_file_content_str, 'utf-8') @@ -206,11 +218,11 @@ class GitLabProvider(GitProvider): raise NotImplementedError("Gitlab provider does not support publishing inline comments yet") def get_comment_body_from_comment_id(self, comment_id: int): - comment = self.mr.notes.get(comment_id) + comment = self.mr.notes.get(comment_id).body return comment def send_inline_comment(self,body: str,edit_type: str,found: bool,relevant_file: str,relevant_line_in_file: int, - source_line_no: int, target_file: str,target_line_no: int) -> None: + source_line_no: int, target_file: str,target_line_no: int, original_suggestion) -> None: if not found: get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") else: @@ -230,12 +242,46 @@ class GitLabProvider(GitProvider): else: pos_obj['new_line'] = target_line_no - 1 pos_obj['old_line'] = source_line_no - 1 - get_logger().debug(f"Creating comment in {self.id_mr} with body {body} and position {pos_obj}") + get_logger().debug(f"Creating comment in MR {self.id_mr} with body {body} and position {pos_obj}") try: self.mr.discussions.create({'body': body, 'position': pos_obj}) except Exception as e: - get_logger().debug( - f"Failed to create comment in {self.id_mr} with position {pos_obj} (probably not a '+' line)") + try: + # fallback - create a general note on the file in the MR + line_start = original_suggestion['suggestion_orig_location']['start_line'] + line_end = original_suggestion['suggestion_orig_location']['end_line'] + old_code_snippet = original_suggestion['prev_code_snippet'] + new_code_snippet = original_suggestion['new_code_snippet'] + content = original_suggestion['suggestion_summary'] + label = original_suggestion['category'] + score = original_suggestion['score'] + + if hasattr(self, 'main_language'): + language = self.main_language + else: + language = '' + link = self.get_line_link(relevant_file, line_start, line_end) + body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n___\n" + body_fallback +=f"\n\nReplace lines ([{line_start}-{line_end}]({link}))\n\n```{language}\n{old_code_snippet}\n````\n\n" + body_fallback +=f"with\n\n```{language}\n{new_code_snippet}\n````" + body_fallback += f"\n\n___\n\n`(Cannot implement this suggestion directly, as gitlab API does not enable committing to a non -+ line in a PR)`" + + # Create a general note on the file in the MR + self.mr.notes.create({ + 'body': body_fallback, + 'position': { + 'base_sha': diff.base_commit_sha, + 'start_sha': diff.start_commit_sha, + 'head_sha': diff.head_commit_sha, + 'position_type': 'text', + 'file_path': f'{target_file.filename}', + } + }) + + # get_logger().debug( + # f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)") + except Exception as e: + get_logger().exception(f"Failed to create comment in MR {self.id_mr} with position {pos_obj}: {e}") def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: int) -> Optional[dict]: changes = self.mr.changes() # Retrieve the changes for the merge request once @@ -257,6 +303,7 @@ class GitLabProvider(GitProvider): def publish_code_suggestions(self, code_suggestions: list) -> bool: for suggestion in code_suggestions: try: + original_suggestion = suggestion['original_suggestion'] body = suggestion['body'] relevant_file = suggestion['relevant_file'] relevant_lines_start = suggestion['relevant_lines_start'] @@ -283,13 +330,16 @@ class GitLabProvider(GitProvider): edit_type = 'addition' self.send_inline_comment(body, edit_type, found, relevant_file, relevant_line_in_file, source_line_no, - target_file, target_line_no) + target_file, target_line_no, original_suggestion) except Exception as e: get_logger().exception(f"Could not publish code suggestion:\nsuggestion: {suggestion}\nerror: {e}") # note that we publish suggestions one-by-one. so, if one fails, the rest will still be published return True + def publish_file_comments(self, file_comments: list) -> bool: + pass + def search_line(self, relevant_file, relevant_line_in_file): target_file = None @@ -380,6 +430,9 @@ class GitLabProvider(GitProvider): except Exception: return "" + def get_workspace_name(self): + return self.id_project.split('/')[0] + def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]: return True diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index c028b457..ddad36c6 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -11,7 +11,8 @@ from pr_agent.algo.pr_processing import get_pr_diff, get_pr_multi_diffs, retry_w from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import load_yaml, replace_code_tags, ModelType, show_relevant_configurations from pr_agent.config_loader import get_settings -from pr_agent.git_providers import get_git_provider, get_git_provider_with_context, GithubProvider, GitLabProvider +from pr_agent.git_providers import get_git_provider, get_git_provider_with_context, GithubProvider, GitLabProvider, \ + AzureDevopsProvider from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.log import get_logger from pr_agent.servers.help import HelpMessage @@ -176,6 +177,13 @@ class PRCodeSuggestions: final_update_message=True, max_previous_comments=4, progress_response=None): + if isinstance(self.git_provider, AzureDevopsProvider): # get_latest_commit_url is not supported yet + if progress_response: + self.git_provider.edit_comment(progress_response, pr_comment) + else: + self.git_provider.publish_comment(pr_comment) + return + history_header = f"#### Previous suggestions\n" last_commit_num = self.git_provider.get_latest_commit_url().split('/')[-1][:7] latest_suggestion_header = f"Latest suggestions up to {last_commit_num}" @@ -248,7 +256,7 @@ class PRCodeSuggestions: get_logger().info(f"Persistent mode - updating comment {comment_url} to latest {name} message") if progress_response: # publish to 'progress_response' comment, because it refreshes immediately self.git_provider.edit_comment(progress_response, pr_comment_updated) - comment.delete() + self.git_provider.delete_comment(comment) else: self.git_provider.edit_comment(comment, pr_comment_updated) return @@ -424,7 +432,8 @@ class PRCodeSuggestions: body = f"**Suggestion:** {content} [{label}]\n```suggestion\n" + new_code_snippet + "\n```" code_suggestions.append({'body': body, 'relevant_file': relevant_file, 'relevant_lines_start': relevant_lines_start, - 'relevant_lines_end': relevant_lines_end}) + 'relevant_lines_end': relevant_lines_end, + 'original_suggestion': d}) except Exception: get_logger().info(f"Could not parse suggestion: {d}")