diff --git a/pr_agent/agent/pr_agent.py b/pr_agent/agent/pr_agent.py index 4b7af70a..66b844d7 100644 --- a/pr_agent/agent/pr_agent.py +++ b/pr_agent/agent/pr_agent.py @@ -16,10 +16,16 @@ class PRAgent: if any(cmd in request for cmd in ["/answer"]): await PRReviewer(pr_url, is_answer=True).review() elif any(cmd in request for cmd in ["/review", "/review_pr", "/reflect_and_review"]): + words = request.split(" ") + incremental_review = False + if len(words) > 1: + arg = words[1] + if arg == "-i": + incremental_review = True if settings.pr_reviewer.ask_and_reflect or "/reflect_and_review" in request: await PRInformationFromUser(pr_url).generate_questions() else: - await PRReviewer(pr_url).review() + await PRReviewer(pr_url, is_incremental=incremental_review).review() elif any(cmd in request for cmd in ["/describe", "/describe_pr"]): await PRDescription(pr_url).describe() elif any(cmd in request for cmd in ["/improve", "/improve_code"]): diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 165b7de5..11f16449 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -1,14 +1,15 @@ from __future__ import annotations -import difflib import logging -from typing import Any, Tuple, Union +from typing import Tuple, Union from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions from pr_agent.algo.language_handler import sort_files_by_main_languages from pr_agent.algo.token_handler import TokenHandler +from pr_agent.algo.utils import load_large_diff from pr_agent.config_loader import settings -from pr_agent.git_providers import GithubProvider +from pr_agent.git_providers.git_provider import GitProvider + DELETED_FILES_ = "Deleted files:\n" @@ -19,7 +20,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 PATCH_EXTRA_LINES = 3 -def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHandler, +def get_pr_diff(git_provider: Union[GitProvider], token_handler: TokenHandler, add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool =False) -> str: """ Returns a string with the diff of the PR. @@ -163,14 +164,3 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, return patches, modified_files_list, deleted_files_list -def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str: - if not patch: # to Do - also add condition for file extension - try: - diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True), - new_file_content_str.splitlines(keepends=True)) - if settings.config.verbosity_level >= 2: - logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.") - patch = ''.join(diff) - except Exception: - pass - return patch diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index f813b8cd..1d85b1d6 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -1,10 +1,14 @@ from __future__ import annotations +import difflib +from datetime import datetime import json import logging import re import textwrap +from pr_agent.config_loader import settings + def convert_to_markdown(output_data: dict) -> str: markdown_text = "" @@ -102,3 +106,21 @@ def fix_json_escape_char(json_message=None): new_message = ''.join(json_message) return fix_json_escape_char(json_message=new_message) return result + + +def convert_str_to_datetime(date_str): + datetime_format = '%a, %d %b %Y %H:%M:%S %Z' + return datetime.strptime(date_str, datetime_format) + + +def load_large_diff(file, new_file_content_str: str, original_file_content_str: str, patch: str) -> str: + if not patch: # to Do - also add condition for file extension + try: + diff = difflib.unified_diff(original_file_content_str.splitlines(keepends=True), + new_file_content_str.splitlines(keepends=True)) + if settings.config.verbosity_level >= 2: + logging.warning(f"File was modified, but no patch was found. Manually creating patch: {file.filename}.") + patch = ''.join(diff) + except Exception: + pass + return patch diff --git a/pr_agent/cli.py b/pr_agent/cli.py index ca9d5db0..acb331a1 100644 --- a/pr_agent/cli.py +++ b/pr_agent/cli.py @@ -57,7 +57,11 @@ reflect - Ask the PR author questions about the PR. asyncio.run(reviewer.suggest()) elif command in ['review', 'review_pr']: print(f"Reviewing PR: {args.pr_url}") - reviewer = PRReviewer(args.pr_url, cli_mode=True) + incremental_review = False + if len(args.rest) > 0: + incremental_review = args.rest[0].startswith("-i") + + reviewer = PRReviewer(args.pr_url, cli_mode=True, is_incremental=incremental_review) asyncio.run(reviewer.review()) elif command in ['reflect']: print(f"Asking the PR author questions: {args.pr_url}") diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index bb7b2c1d..1f3f9d6e 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -11,7 +11,7 @@ from .git_provider import FilePatchInfo class BitbucketProvider: - def __init__(self, pr_url: Optional[str] = None): + def __init__(self, pr_url: Optional[str] = None, incremental: Optional[bool] = False): s = requests.Session() s.headers['Authorization'] = f'Bearer {settings.get("BITBUCKET.BEARER_TOKEN", None)}' self.bitbucket_client = Cloud(session=s) @@ -22,6 +22,7 @@ class BitbucketProvider: self.pr_num = None self.pr = None self.temp_comments = [] + self.incremental = incremental if pr_url: self.set_pr(pr_url) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 9651cfda..c125b2d3 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -9,10 +9,11 @@ from pr_agent.config_loader import settings from .git_provider import FilePatchInfo, GitProvider from ..algo.language_handler import is_valid_file +from ..algo.utils import load_large_diff class GithubProvider(GitProvider): - def __init__(self, pr_url: Optional[str] = None): + def __init__(self, pr_url: Optional[str] = None, incremental: Optional[bool] = False): self.installation_id = settings.get("GITHUB.INSTALLATION_ID") self.github_client = self._get_github_client() self.repo = None @@ -20,6 +21,7 @@ class GithubProvider(GitProvider): self.pr = None self.github_user_id = None self.diff_files = None + self.incremental = incremental if pr_url: self.set_pr(pr_url) self.last_commit_id = list(self.pr.get_commits())[-1] @@ -30,18 +32,60 @@ class GithubProvider(GitProvider): def set_pr(self, pr_url: str): self.repo, self.pr_num = self._parse_pr_url(pr_url) self.pr = self._get_pr() + if self.incremental: + self.commits = list(self.pr.get_commits()) + self.comments = list(self.pr.get_issue_comments()) + self.previous_review = None + self.first_new_commit_sha = None + self.incremental_files = None + + for index in range(len(self.comments) - 1, -1, -1): + if self.comments[index].user.login == "github-actions[bot]" or \ + self.comments[index].user.login == "CodiumAI-Agent" and \ + self.comments[index].body.startswith("## PR Analysis"): + self.previous_review = self.comments[index] + break + if self.previous_review: + last_review_time = self.previous_review.created_at + first_new_commit_index = 0 + self.last_seen_commit_sha = None + for index in range(len(self.commits) - 1, -1, -1): + if self.commits[index].commit.author.date > last_review_time: + self.first_new_commit_sha = self.commits[index].sha + first_new_commit_index = index + else: + self.last_seen_commit_sha = self.commits[index].sha + break + + self.commits = self.commits[first_new_commit_index:] + self.file_set = dict() + for commit in self.commits: + self.file_set.update({file.filename: file for file in commit.files}) def get_files(self): + if self.incremental and self.file_set: + return self.file_set.values() return self.pr.get_files() def get_diff_files(self) -> list[FilePatchInfo]: - files = self.pr.get_files() + files = self.get_files() diff_files = [] for file in files: if is_valid_file(file.filename): - original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) - diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, file.patch, file.filename)) + patch = file.patch + if self.incremental and self.file_set: + original_file_content_str = self._get_pr_file_content(file, self.last_seen_commit_sha) + patch = load_large_diff(file, + new_file_content_str, + original_file_content_str, + None) + self.file_set[file.filename] = patch + else: + original_file_content_str = self._get_pr_file_content(file, self.pr.base.sha) + + diff_files.append( + FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename)) self.diff_files = diff_files return diff_files @@ -90,7 +134,7 @@ class GithubProvider(GitProvider): logging.exception(f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}") return False - if relevant_lines_end= 2: logging.exception(f"Failed to publish code suggestion, " f"relevant_lines_end is {relevant_lines_end} and " diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 1a92ac7c..17927dd1 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -13,7 +13,7 @@ from ..algo.language_handler import is_valid_file class GitLabProvider(GitProvider): - def __init__(self, merge_request_url: Optional[str] = None): + def __init__(self, merge_request_url: Optional[str] = None, incremental: Optional[bool] = False): gitlab_url = settings.get("GITLAB.URL", None) if not gitlab_url: raise ValueError("GitLab URL is not set in the config file") @@ -32,6 +32,7 @@ class GitLabProvider(GitProvider): self._set_merge_request(merge_request_url) self.RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + self.incremental = incremental def is_supported(self, capability: str) -> bool: if capability == 'get_issue_comments': diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 62264ec1..38f918b9 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -15,9 +15,9 @@ from pr_agent.servers.help import actions_help_text, bot_help_text class PRReviewer: - def __init__(self, pr_url: str, cli_mode=False, is_answer: bool = False): + def __init__(self, pr_url: str, cli_mode=False, is_answer: bool = False, is_incremental: bool = False): - self.git_provider = get_git_provider()(pr_url) + self.git_provider = get_git_provider()(pr_url, incremental=is_incremental) self.main_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() )