From 65bb70a1dd8bd3aa8f2f7c4eb762b6c46f731cd8 Mon Sep 17 00:00:00 2001 From: zmeir Date: Wed, 25 Oct 2023 11:15:23 +0300 Subject: [PATCH 1/5] Added support for automatic review on push event The new feature can be enabled via the new configuration `github_app.handle_push_event`. To avoid any unwanted side-effects, the current default of this configuration is set to `false`. The high level flow (assuming the configuration is enabled): 1. receive push event from GitHub 2. extract branch and commits from event 3. find PR url for branch (currently does not support PRs from forks) 4. perform configured commands (e.g. `/describe`, `/review -i`) The push event flow is guarded by a backlog queue so that multiple push events on the same branch won't trigger multiple duplicate runs of the PR-Agent commands. Example timeline: 1. push 1 - start handling event 2. push 2 - waiting to be handled while push 1 event is still running 3. push 3 - event is dropped since handling it and handling push 2 is the same, so it is redundant 4. push 1 finished being handled 5. push 2 awakens from wait and continues handling (potentially reviewing the commits of both push 2 and push 3) All of these options are configurable and can be enabled/disabled as per the user's desire. Additional minor changes in this PR: 1. Created `DefaultDictWithTimeout` utility class to avoid too much boilerplate code in managing caches for outdated triggers. 2. Guard against running increment review when there are no new commits. 3. Minor styling changes for incremented review text. --- pr_agent/servers/github_app.py | 145 ++++++++++++++++++++++----- pr_agent/servers/utils.py | 59 +++++++++++ pr_agent/settings/configuration.toml | 20 ++++ pr_agent/tools/pr_reviewer.py | 9 +- 4 files changed, 209 insertions(+), 24 deletions(-) diff --git a/pr_agent/servers/github_app.py b/pr_agent/servers/github_app.py index 37f96e2d..7039e178 100644 --- a/pr_agent/servers/github_app.py +++ b/pr_agent/servers/github_app.py @@ -1,7 +1,7 @@ import copy import os -import time -from typing import Any, Dict +import asyncio.locks +from typing import Any, Dict, List import uvicorn from fastapi import APIRouter, FastAPI, HTTPException, Request, Response @@ -14,8 +14,9 @@ from pr_agent.algo.utils import update_settings_from_args from pr_agent.config_loader import get_settings, global_settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.utils import apply_repo_settings +from pr_agent.git_providers.git_provider import IncrementalPR from pr_agent.log import LoggingFormat, get_logger, setup_logger -from pr_agent.servers.utils import verify_signature +from pr_agent.servers.utils import verify_signature, DefaultDictWithTimeout setup_logger(fmt=LoggingFormat.JSON) @@ -47,6 +48,7 @@ async def handle_marketplace_webhooks(request: Request, response: Response): body = await get_body(request) get_logger().info(f'Request body:\n{body}') + async def get_body(request): try: body = await request.json() @@ -61,7 +63,9 @@ async def get_body(request): return body -_duplicate_requests_cache = {} +_duplicate_requests_cache = DefaultDictWithTimeout(ttl=get_settings().github_app.duplicate_requests_cache_ttl) +_duplicate_branch_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_event_pending_triggers_ttl) +_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_event_pending_triggers_ttl) async def handle_request(body: Dict[str, Any], event: str): @@ -73,7 +77,7 @@ async def handle_request(body: Dict[str, Any], event: str): event: The GitHub event type. """ action = body.get("action") - if not action: + if not (action or event == "push"): return {} agent = PRAgent() bot_user = get_settings().github_app.bot_user @@ -127,22 +131,122 @@ async def handle_request(body: Dict[str, Any], event: str): # avoid double reviews when opening a PR for the first time return {} get_logger().info(f"Performing review because of event={event} and action={action}") - apply_repo_settings(api_url) - for command in get_settings().github_app.pr_commands: - split_command = command.split(" ") - command = split_command[0] - args = split_command[1:] - other_args = update_settings_from_args(args) - new_command = ' '.join([command] + other_args) - get_logger().info(body) - get_logger().info(f"Performing command: {new_command}") - with get_logger().contextualize(**log_context): - await agent.handle_request(api_url, new_command) + await _perform_commands(get_settings().github_app.pr_commands, agent, body, api_url, log_context) + + # handle push event for new commits + elif event == "push" and get_settings().github_app.handle_push_event: + get_logger().debug(f"[PUSH] {body=}") + # get the branch name + ref = body.get("ref") + get_logger().debug(f"[PUSH] {ref=}") + if not (ref and ref.startswith("refs/heads/")): + return {} + branch = ref.removeprefix("refs/heads/") + get_logger().debug(f"[PUSH] {branch=}") + # skip first push (PR will follow) + if body.get("created"): + get_logger().debug("[PUSH] skipping first push") + return {} + # skip if no relevant commits (e.g. no commits at all, only bot/merge commits) + commits = body.get("commits", []) + get_logger().debug(f"[PUSH] {len(commits)} {commits=}") + bot_commits = [commit for commit in commits if commit.get("author", {}).get("username", "") == bot_user] + get_logger().debug(f"[PUSH] {len(bot_commits)} {bot_commits=}") + merge_commits = [commit for commit in commits if commit.get("message", "").startswith("Merge branch ")] + get_logger().debug(f"[PUSH] {len(merge_commits)} {merge_commits=}") + commit_ids_to_ignore = set() + if get_settings().github_app.push_event_ignore_bot_commits: + commit_ids_to_ignore.update({commit.get("id") for commit in bot_commits}) + if get_settings().github_app.push_event_ignore_bot_commits: + commit_ids_to_ignore.update({commit.get("id") for commit in merge_commits}) + commits = [commit for commit in commits if commit.get("id") not in commit_ids_to_ignore] + if not commits: + return {} + # TODO: consider adding some custom prompt to instruct the PR-Agent how to address bot commits + + # Prevent triggering multiple times for subsequent push events when one is enough: + # The first event will trigger the processing, and if there's a second event in the meanwhile it will wait. + # Any more events will be discarded, because they will all trigger the exact same processing on the PR. + # We let the second event wait instead of discarding it because while the first event was being processed, + # more commits may have been pushed that led to the subsequent push events, + # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. + current_active_tasks = _duplicate_branch_push_triggers.setdefault(branch, 0) + max_active_tasks = 2 if get_settings().github_app.push_event_pending_triggers_backlog else 1 + if current_active_tasks < max_active_tasks: + # first and second tasks can enter + get_logger().info( + f"Continue processing push event for {branch=} because there are {current_active_tasks} active tasks" + ) + _duplicate_branch_push_triggers[branch] += 1 + else: + get_logger().info( + f"Skipping push event for {branch=} because another event already triggered the same processing" + ) + return {} + async with _pending_task_duplicate_push_conditions[branch]: + if current_active_tasks == 1: + # second task waits + get_logger().info( + f"Waiting to process push event for {branch=} because the first task is still in progress" + ) + await _pending_task_duplicate_push_conditions[branch].wait() + get_logger().info(f"Finished waiting to process push event for {branch=} - continue with flow") + + try: + # get PR info for branch + provider = get_git_provider()() + provider.repo, _ = provider._parse_pr_url(body["repository"]["html_url"].rstrip("/") + "/pull/1") + get_logger().debug(f"[PUSH] {provider.repo=}") + github_repo = provider._get_repo() + default_branch = body["repository"]["default_branch"] + get_logger().debug(f"[PUSH] {default_branch=}") + org = body["repository"]["organization"] + get_logger().debug(f"[PUSH] {org=}") + pull_requests = list(github_repo.get_pulls(state="open", base=default_branch, head=":".join((org, branch)))) + get_logger().debug(f"[PUSH] {pull_requests=}") + if not pull_requests: + return {} + pull_request = pull_requests[0].raw_data + # check that the PR is valid to run the agent like in the pull_request event + if not pull_request: + return {} + api_url = pull_request.get("url") + get_logger().debug(f"[PUSH] {api_url=}") + if not api_url: + return {} + log_context["api_url"] = api_url + if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + return {} + if get_settings().github_app.push_event_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: + get_logger().info(f"Skipping incremental review because there was no initial review for PR={api_url} yet") + return {} + get_logger().info(f"Performing incremental review because of event={event} and branch={branch} with PR={api_url}") + await _perform_commands(get_settings().github_app.push_commands, agent, body, api_url, log_context) + + finally: + # release the waiting task block + async with _pending_task_duplicate_push_conditions[branch]: + _pending_task_duplicate_push_conditions[branch].notify(1) + _duplicate_branch_push_triggers[branch] -= 1 get_logger().info("event or action does not require handling") return {} +async def _perform_commands(commands: List[str], agent: PRAgent, body: dict, api_url: str, log_context: dict): + apply_repo_settings(api_url) + for command in commands: + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + get_logger().info(body) + get_logger().info(f"Performing command: {new_command}") + with get_logger().contextualize(**log_context): + await agent.handle_request(api_url, new_command) + + def _is_duplicate_request(body: Dict[str, Any]) -> bool: """ In some deployments its possible to get duplicate requests if the handling is long, @@ -150,13 +254,8 @@ def _is_duplicate_request(body: Dict[str, Any]) -> bool: """ request_hash = hash(str(body)) get_logger().info(f"request_hash: {request_hash}") - request_time = time.monotonic() - ttl = get_settings().github_app.duplicate_requests_cache_ttl # in seconds - to_delete = [key for key, key_time in _duplicate_requests_cache.items() if request_time - key_time > ttl] - for key in to_delete: - del _duplicate_requests_cache[key] - is_duplicate = request_hash in _duplicate_requests_cache - _duplicate_requests_cache[request_hash] = request_time + is_duplicate = _duplicate_requests_cache.get(request_hash, False) + _duplicate_requests_cache[request_hash] = True if is_duplicate: get_logger().info(f"Ignoring duplicate request {request_hash}") return is_duplicate diff --git a/pr_agent/servers/utils.py b/pr_agent/servers/utils.py index c24b880c..12dd85ae 100644 --- a/pr_agent/servers/utils.py +++ b/pr_agent/servers/utils.py @@ -1,5 +1,8 @@ import hashlib import hmac +import time +from collections import defaultdict +from typing import Callable, Any from fastapi import HTTPException @@ -25,3 +28,59 @@ def verify_signature(payload_body, secret_token, signature_header): class RateLimitExceeded(Exception): """Raised when the git provider API rate limit has been exceeded.""" pass + + +class DefaultDictWithTimeout(defaultdict): + """A defaultdict with a time-to-live (TTL).""" + + def __init__( + self, + default_factory: Callable[[], Any] = None, + ttl: int = None, + refresh_interval: int = 60, + update_key_time_on_get: bool = True, + *args, + **kwargs, + ): + """ + Args: + default_factory: The default factory to use for keys that are not in the dictionary. + ttl: The time-to-live (TTL) in seconds. + refresh_interval: How often to refresh the dict and delete items older than the TTL. + update_key_time_on_get: Whether to update the access time of a key also on get (or only when set). + """ + super().__init__(default_factory, *args, **kwargs) + self.__key_times = dict() + self.__ttl = ttl + self.__refresh_interval = refresh_interval + self.__update_key_time_on_get = update_key_time_on_get + self.__last_refresh = self.__time() - self.__refresh_interval + + @staticmethod + def __time(): + return time.monotonic() + + def __refresh(self): + if self.__ttl is None: + return + request_time = self.__time() + if request_time - self.__last_refresh > self.__refresh_interval: + return + to_delete = [key for key, key_time in self.__key_times.items() if request_time - key_time > self.__ttl] + for key in to_delete: + del self[key] + self.__last_refresh = request_time + + def __getitem__(self, __key): + if self.__update_key_time_on_get: + self.__key_times[__key] = self.__time() + self.__refresh() + return super().__getitem__(__key) + + def __setitem__(self, __key, __value): + self.__key_times[__key] = self.__time() + return super().__setitem__(__key, __value) + + def __delitem__(self, __key): + del self.__key_times[__key] + return super().__delitem__(__key) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 6f44bc53..32fcfb83 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -83,6 +83,26 @@ pr_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", "/auto_review", ] +# settings for "push" event +handle_push_event = false +push_event_ignore_bot_commits = true +push_event_ignore_merge_commits = true +push_event_wait_for_initial_review = true +push_event_pending_triggers_backlog = true +push_event_pending_triggers_ttl = 300 +push_commands = [ + "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", + """/auto_review -i \ + --pr_reviewer.require_focused_review=false \ + --pr_reviewer.require_score_review=false \ + --pr_reviewer.require_tests_review=false \ + --pr_reviewer.require_security_review=false \ + --pr_reviewer.require_estimate_effort_to_review=false \ + --pr_reviewer.num_code_suggestions=0 \ + --pr_reviewer.inline_code_comments=false \ + --pr_reviewer.extra_instructions='' \ + """ +] [gitlab] # URL to the gitlab service diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index ed99ddf6..0cc695e3 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -98,6 +98,9 @@ class PRReviewer: if self.is_auto and not get_settings().pr_reviewer.automatic_review: get_logger().info(f'Automatic review is disabled {self.pr_url}') return None + if self.is_auto and self.incremental.is_incremental and not self.incremental.first_new_commit_sha: + get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits") + return None get_logger().info(f'Reviewing PR: {self.pr_url} ...') @@ -228,9 +231,13 @@ class PRReviewer: if self.incremental.is_incremental: last_commit_url = f"{self.git_provider.get_pr_url()}/commits/" \ f"{self.git_provider.incremental.first_new_commit_sha}" + last_commit_msg = self.incremental.commits_range[0].commit.message if self.incremental.commits_range else "" + incremental_review_markdown_text = f"Starting from commit {last_commit_url}" + if last_commit_msg: + incremental_review_markdown_text += f" \n_({last_commit_msg.splitlines(keepends=False)[0]})_" data = OrderedDict(data) data.update({'Incremental PR Review': { - "⏮️ Review for commits since previous PR-Agent review": f"Starting from commit {last_commit_url}"}}) + "⏮️ Review for commits since previous PR-Agent review": incremental_review_markdown_text}}) data.move_to_end('Incremental PR Review', last=False) markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown")) From 02570ea797623fc3edecad5fc40d4b829162f7e2 Mon Sep 17 00:00:00 2001 From: zmeir Date: Thu, 26 Oct 2023 11:24:24 +0300 Subject: [PATCH 2/5] Remove previous review comment on push event --- pr_agent/git_providers/bitbucket_provider.py | 7 ++++- pr_agent/git_providers/codecommit_provider.py | 3 ++ pr_agent/git_providers/gerrit_provider.py | 3 ++ pr_agent/git_providers/git_provider.py | 4 +++ pr_agent/git_providers/github_provider.py | 28 +++++++++++++------ pr_agent/git_providers/gitlab_provider.py | 8 +++++- pr_agent/git_providers/local_git_provider.py | 3 ++ pr_agent/settings/configuration.toml | 2 ++ pr_agent/tools/pr_reviewer.py | 26 ++++++++++++++++- 9 files changed, 73 insertions(+), 11 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index fef51794..fe416d5c 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -142,10 +142,15 @@ class BitbucketProvider(GitProvider): def remove_initial_comment(self): try: for comment in self.temp_comments: - self.pr.delete(f"comments/{comment}") + self.remove_comment(comment) except Exception as e: get_logger().exception(f"Failed to remove temp comments, error: {e}") + def remove_comment(self, comment): + try: + self.pr.delete(f"comments/{comment}") + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") # funtion to create_inline_comment def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): diff --git a/pr_agent/git_providers/codecommit_provider.py b/pr_agent/git_providers/codecommit_provider.py index 4e12f96e..a4836849 100644 --- a/pr_agent/git_providers/codecommit_provider.py +++ b/pr_agent/git_providers/codecommit_provider.py @@ -221,6 +221,9 @@ class CodeCommitProvider(GitProvider): def remove_initial_comment(self): return "" # not implemented yet + def remove_comment(self, comment): + return "" # not implemented yet + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet") diff --git a/pr_agent/git_providers/gerrit_provider.py b/pr_agent/git_providers/gerrit_provider.py index ae017fbd..fd67f5f0 100644 --- a/pr_agent/git_providers/gerrit_provider.py +++ b/pr_agent/git_providers/gerrit_provider.py @@ -396,5 +396,8 @@ class GerritProvider(GitProvider): # shutil.rmtree(self.repo_path) pass + def remove_comment(self, comment): + pass + def get_pr_branch(self): return self.repo.head diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index c6740467..6f986cf0 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -71,6 +71,10 @@ class GitProvider(ABC): def remove_initial_comment(self): pass + @abstractmethod + def remove_comment(self, comment): + pass + @abstractmethod def get_languages(self): pass diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 7a47fbf5..88fb3863 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -50,7 +50,7 @@ class GithubProvider(GitProvider): def get_incremental_commits(self): self.commits = list(self.pr.get_commits()) - self.get_previous_review() + self.previous_review = self.get_previous_review(full=True, incremental=True) if self.previous_review: self.incremental.commits_range = self.get_commit_range() # Get all files changed during the commit range @@ -73,13 +73,19 @@ class GithubProvider(GitProvider): break return self.commits[first_new_commit_index:] - def get_previous_review(self): - self.previous_review = None - self.comments = list(self.pr.get_issue_comments()) + def get_previous_review(self, *, full: bool, incremental: bool): + if not (full or incremental): + raise ValueError("At least one of full or incremental must be True") + if not getattr(self, "comments", None): + self.comments = list(self.pr.get_issue_comments()) + prefixes = [] + if full: + prefixes.append("## PR Analysis") + if incremental: + prefixes.append("## Incremental PR Review") for index in range(len(self.comments) - 1, -1, -1): - if self.comments[index].body.startswith("## PR Analysis") or self.comments[index].body.startswith("## Incremental PR Review"): - self.previous_review = self.comments[index] - break + if any(self.comments[index].body.startswith(prefix) for prefix in prefixes): + return self.comments[index] def get_files(self): if self.incremental.is_incremental and self.file_set: @@ -218,10 +224,16 @@ class GithubProvider(GitProvider): try: for comment in getattr(self.pr, 'comments_list', []): if comment.is_temporary: - comment.delete() + self.remove_comment(comment) except Exception as e: get_logger().exception(f"Failed to remove initial comment, error: {e}") + def remove_comment(self, comment): + try: + comment.delete() + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + def get_title(self): return self.pr.title diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 0d09e622..d36fecf9 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -287,10 +287,16 @@ class GitLabProvider(GitProvider): def remove_initial_comment(self): try: for comment in self.temp_comments: - comment.delete() + self.remove_comment(comment) except Exception as e: get_logger().exception(f"Failed to remove temp comments, error: {e}") + def remove_comment(self, comment): + try: + comment.delete() + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + def get_title(self): return self.mr.title diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 5fa9f0be..0ef11413 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -140,6 +140,9 @@ class LocalGitProvider(GitProvider): def remove_initial_comment(self): pass # Not applicable to the local git provider, but required by the interface + def remove_comment(self, comment): + pass # Not applicable to the local git provider, but required by the interface + def get_languages(self): """ Calculate percentage of languages in repository. Used for hunk prioritisation. diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 32fcfb83..3fdcf93c 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -24,6 +24,7 @@ num_code_suggestions=4 inline_code_comments = false ask_and_reflect=false automatic_review=true +remove_previous_review_comment=false extra_instructions = "" [pr_description] # /describe # @@ -100,6 +101,7 @@ push_commands = [ --pr_reviewer.require_estimate_effort_to_review=false \ --pr_reviewer.num_code_suggestions=0 \ --pr_reviewer.inline_code_comments=false \ + --pr_reviewer.remove_previous_review_comment=true \ --pr_reviewer.extra_instructions='' \ """ ] diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 0cc695e3..b13b5e17 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -114,9 +114,10 @@ class PRReviewer: if get_settings().config.publish_output: get_logger().info('Pushing PR review...') + previous_review_comment = self._get_previous_review_comment() self.git_provider.publish_comment(pr_comment) self.git_provider.remove_initial_comment() - + self._remove_previous_review_comment(previous_review_comment) if get_settings().pr_reviewer.inline_code_comments: get_logger().info('Pushing inline code comments...') self._publish_inline_code_comments() @@ -318,3 +319,26 @@ class PRReviewer: break return question_str, answer_str + + def _get_previous_review_comment(self): + """ + Get the previous review comment if it exists. + """ + try: + if get_settings().pr_reviewer.remove_previous_review_comment and hasattr(self.git_provider, "get_previous_review"): + return self.git_provider.get_previous_review( + full=not self.incremental.is_incremental, + incremental=self.incremental.is_incremental, + ) + except Exception as e: + get_logger().exception(f"Failed to get previous review comment, error: {e}") + + def _remove_previous_review_comment(self, comment): + """ + Get the previous review comment if it exists. + """ + try: + if get_settings().pr_reviewer.remove_previous_review_comment and comment: + self.git_provider.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove previous review comment, error: {e}") From 6541575a0ea499c4e5771f9e5457902486bddb8a Mon Sep 17 00:00:00 2001 From: zmeir Date: Thu, 26 Oct 2023 14:45:57 +0300 Subject: [PATCH 3/5] Refactor to use pull_request synchronize event --- pr_agent/servers/github_app.py | 148 +++++++++++---------------- pr_agent/settings/configuration.toml | 14 +-- 2 files changed, 66 insertions(+), 96 deletions(-) diff --git a/pr_agent/servers/github_app.py b/pr_agent/servers/github_app.py index 7039e178..2bb85a3d 100644 --- a/pr_agent/servers/github_app.py +++ b/pr_agent/servers/github_app.py @@ -1,7 +1,7 @@ import copy import os import asyncio.locks -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple import uvicorn from fastapi import APIRouter, FastAPI, HTTPException, Request, Response @@ -64,8 +64,8 @@ async def get_body(request): _duplicate_requests_cache = DefaultDictWithTimeout(ttl=get_settings().github_app.duplicate_requests_cache_ttl) -_duplicate_branch_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_event_pending_triggers_ttl) -_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_event_pending_triggers_ttl) +_duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) +_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) async def handle_request(body: Dict[str, Any], event: str): @@ -77,7 +77,7 @@ async def handle_request(body: Dict[str, Any], event: str): event: The GitHub event type. """ action = body.get("action") - if not (action or event == "push"): + if not action: return {} agent = PRAgent() bot_user = get_settings().github_app.bot_user @@ -113,126 +113,96 @@ async def handle_request(body: Dict[str, Any], event: str): # handle pull_request event: # automatically review opened/reopened/ready_for_review PRs as long as they're not in draft, # as well as direct review requests from the bot - elif event == 'pull_request': - pull_request = body.get("pull_request") - if not pull_request: - return {} - api_url = pull_request.get("url") - if not api_url: - return {} - log_context["api_url"] = api_url - if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + elif event == 'pull_request' and action != 'synchronize': + pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user) + if not (pull_request and api_url): return {} if action in get_settings().github_app.handle_pr_actions: if action == "review_requested": if body.get("requested_reviewer", {}).get("login", "") != bot_user: return {} - if pull_request.get("created_at") == pull_request.get("updated_at"): - # avoid double reviews when opening a PR for the first time - return {} - get_logger().info(f"Performing review because of event={event} and action={action}") + get_logger().info(f"Performing review for {api_url=} because of {event=} and {action=}") await _perform_commands(get_settings().github_app.pr_commands, agent, body, api_url, log_context) - # handle push event for new commits - elif event == "push" and get_settings().github_app.handle_push_event: - get_logger().debug(f"[PUSH] {body=}") - # get the branch name - ref = body.get("ref") - get_logger().debug(f"[PUSH] {ref=}") - if not (ref and ref.startswith("refs/heads/")): + # handle pull_request event with synchronize action - "push trigger" for new commits + elif event == 'pull_request' and action == 'synchronize' and get_settings().github_app.handle_push_trigger: + pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user) + if not (pull_request and api_url): return {} - branch = ref.removeprefix("refs/heads/") - get_logger().debug(f"[PUSH] {branch=}") - # skip first push (PR will follow) - if body.get("created"): - get_logger().debug("[PUSH] skipping first push") - return {} - # skip if no relevant commits (e.g. no commits at all, only bot/merge commits) - commits = body.get("commits", []) - get_logger().debug(f"[PUSH] {len(commits)} {commits=}") - bot_commits = [commit for commit in commits if commit.get("author", {}).get("username", "") == bot_user] - get_logger().debug(f"[PUSH] {len(bot_commits)} {bot_commits=}") - merge_commits = [commit for commit in commits if commit.get("message", "").startswith("Merge branch ")] - get_logger().debug(f"[PUSH] {len(merge_commits)} {merge_commits=}") - commit_ids_to_ignore = set() - if get_settings().github_app.push_event_ignore_bot_commits: - commit_ids_to_ignore.update({commit.get("id") for commit in bot_commits}) - if get_settings().github_app.push_event_ignore_bot_commits: - commit_ids_to_ignore.update({commit.get("id") for commit in merge_commits}) - commits = [commit for commit in commits if commit.get("id") not in commit_ids_to_ignore] - if not commits: - return {} - # TODO: consider adding some custom prompt to instruct the PR-Agent how to address bot commits - # Prevent triggering multiple times for subsequent push events when one is enough: - # The first event will trigger the processing, and if there's a second event in the meanwhile it will wait. + # TODO: do we still want to get the list of commits to filter bot/merge commits? + before_sha = body.get("before") + after_sha = body.get("after") + merge_commit_sha = pull_request.get("merge_commit_sha") + if before_sha == after_sha: + return {} + if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha: + return {} + if get_settings().github_app.push_trigger_ignore_bot_commits and body.get("sender", {}).get("login", "") == bot_user: + return {} + + # Prevent triggering multiple times for subsequent push triggers when one is enough: + # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait. # Any more events will be discarded, because they will all trigger the exact same processing on the PR. # We let the second event wait instead of discarding it because while the first event was being processed, - # more commits may have been pushed that led to the subsequent push events, + # more commits may have been pushed that led to the subsequent events, # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. - current_active_tasks = _duplicate_branch_push_triggers.setdefault(branch, 0) - max_active_tasks = 2 if get_settings().github_app.push_event_pending_triggers_backlog else 1 + current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0) + max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1 if current_active_tasks < max_active_tasks: - # first and second tasks can enter + # first task can enter, and second tasks too if backlog is enabled get_logger().info( - f"Continue processing push event for {branch=} because there are {current_active_tasks} active tasks" + f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks" ) - _duplicate_branch_push_triggers[branch] += 1 + _duplicate_push_triggers[api_url] += 1 else: get_logger().info( - f"Skipping push event for {branch=} because another event already triggered the same processing" + f"Skipping push trigger for {api_url=} because another event already triggered the same processing" ) return {} - async with _pending_task_duplicate_push_conditions[branch]: + async with _pending_task_duplicate_push_conditions[api_url]: if current_active_tasks == 1: # second task waits get_logger().info( - f"Waiting to process push event for {branch=} because the first task is still in progress" + f"Waiting to process push trigger for {api_url=} because the first task is still in progress" ) - await _pending_task_duplicate_push_conditions[branch].wait() - get_logger().info(f"Finished waiting to process push event for {branch=} - continue with flow") + await _pending_task_duplicate_push_conditions[api_url].wait() + get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow") try: - # get PR info for branch - provider = get_git_provider()() - provider.repo, _ = provider._parse_pr_url(body["repository"]["html_url"].rstrip("/") + "/pull/1") - get_logger().debug(f"[PUSH] {provider.repo=}") - github_repo = provider._get_repo() - default_branch = body["repository"]["default_branch"] - get_logger().debug(f"[PUSH] {default_branch=}") - org = body["repository"]["organization"] - get_logger().debug(f"[PUSH] {org=}") - pull_requests = list(github_repo.get_pulls(state="open", base=default_branch, head=":".join((org, branch)))) - get_logger().debug(f"[PUSH] {pull_requests=}") - if not pull_requests: + if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: + get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet") return {} - pull_request = pull_requests[0].raw_data - # check that the PR is valid to run the agent like in the pull_request event - if not pull_request: - return {} - api_url = pull_request.get("url") - get_logger().debug(f"[PUSH] {api_url=}") - if not api_url: - return {} - log_context["api_url"] = api_url - if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: - return {} - if get_settings().github_app.push_event_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: - get_logger().info(f"Skipping incremental review because there was no initial review for PR={api_url} yet") - return {} - get_logger().info(f"Performing incremental review because of event={event} and branch={branch} with PR={api_url}") + get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}") await _perform_commands(get_settings().github_app.push_commands, agent, body, api_url, log_context) finally: # release the waiting task block - async with _pending_task_duplicate_push_conditions[branch]: - _pending_task_duplicate_push_conditions[branch].notify(1) - _duplicate_branch_push_triggers[branch] -= 1 + async with _pending_task_duplicate_push_conditions[api_url]: + _pending_task_duplicate_push_conditions[api_url].notify(1) + _duplicate_push_triggers[api_url] -= 1 get_logger().info("event or action does not require handling") return {} +def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_user: str) -> Tuple[Dict[str, Any], str]: + invalid_result = {}, "" + pull_request = body.get("pull_request") + if not pull_request: + return invalid_result + api_url = pull_request.get("url") + if not api_url: + return invalid_result + log_context["api_url"] = api_url + if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + return invalid_result + if action in ("review_requested", "synchronize") and pull_request.get("created_at") == pull_request.get("updated_at"): + # avoid double reviews when opening a PR for the first time + return invalid_result + return pull_request, api_url + + async def _perform_commands(commands: List[str], agent: PRAgent, body: dict, api_url: str, log_context: dict): apply_repo_settings(api_url) for command in commands: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 3fdcf93c..525cce7d 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -84,13 +84,13 @@ pr_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", "/auto_review", ] -# settings for "push" event -handle_push_event = false -push_event_ignore_bot_commits = true -push_event_ignore_merge_commits = true -push_event_wait_for_initial_review = true -push_event_pending_triggers_backlog = true -push_event_pending_triggers_ttl = 300 +# settings for "pull_request" event with "synchronize" action - used to detect and handle push triggers for new commits +handle_push_trigger = false +push_trigger_ignore_bot_commits = true +push_trigger_ignore_merge_commits = true +push_trigger_wait_for_initial_review = true +push_trigger_pending_tasks_backlog = true +push_trigger_pending_tasks_ttl = 300 push_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", """/auto_review -i \ From 414f2b6767e568977f0b622d843b4269721f1857 Mon Sep 17 00:00:00 2001 From: zmeir Date: Thu, 26 Oct 2023 16:46:01 +0300 Subject: [PATCH 4/5] Fix incremental review if there are no new commits (would have performed a full review instead) --- pr_agent/git_providers/github_provider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 88fb3863..4fce1133 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -63,7 +63,7 @@ class GithubProvider(GitProvider): def get_commit_range(self): last_review_time = self.previous_review.created_at - first_new_commit_index = 0 + first_new_commit_index = None for index in range(len(self.commits) - 1, -1, -1): if self.commits[index].commit.author.date > last_review_time: self.incremental.first_new_commit_sha = self.commits[index].sha @@ -71,7 +71,7 @@ class GithubProvider(GitProvider): else: self.incremental.last_seen_commit_sha = self.commits[index].sha break - return self.commits[first_new_commit_index:] + return self.commits[first_new_commit_index:] if first_new_commit_index is not None else [] def get_previous_review(self, *, full: bool, incremental: bool): if not (full or incremental): From e6bea76eee744c74996d827717336eaae1e83480 Mon Sep 17 00:00:00 2001 From: Zohar Meir <33152084+zmeir@users.noreply.github.com> Date: Thu, 26 Oct 2023 17:07:16 +0300 Subject: [PATCH 5/5] Typo --- pr_agent/tools/pr_reviewer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index b13b5e17..c8081fde 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -335,7 +335,7 @@ class PRReviewer: def _remove_previous_review_comment(self, comment): """ - Get the previous review comment if it exists. + Remove the previous review comment if it exists. """ try: if get_settings().pr_reviewer.remove_previous_review_comment and comment: