From 65bb70a1dd8bd3aa8f2f7c4eb762b6c46f731cd8 Mon Sep 17 00:00:00 2001 From: zmeir Date: Wed, 25 Oct 2023 11:15:23 +0300 Subject: [PATCH 1/8] Added support for automatic review on push event The new feature can be enabled via the new configuration `github_app.handle_push_event`. To avoid any unwanted side-effects, the current default of this configuration is set to `false`. The high level flow (assuming the configuration is enabled): 1. receive push event from GitHub 2. extract branch and commits from event 3. find PR url for branch (currently does not support PRs from forks) 4. perform configured commands (e.g. `/describe`, `/review -i`) The push event flow is guarded by a backlog queue so that multiple push events on the same branch won't trigger multiple duplicate runs of the PR-Agent commands. Example timeline: 1. push 1 - start handling event 2. push 2 - waiting to be handled while push 1 event is still running 3. push 3 - event is dropped since handling it and handling push 2 is the same, so it is redundant 4. push 1 finished being handled 5. push 2 awakens from wait and continues handling (potentially reviewing the commits of both push 2 and push 3) All of these options are configurable and can be enabled/disabled as per the user's desire. Additional minor changes in this PR: 1. Created `DefaultDictWithTimeout` utility class to avoid too much boilerplate code in managing caches for outdated triggers. 2. Guard against running increment review when there are no new commits. 3. Minor styling changes for incremented review text. --- pr_agent/servers/github_app.py | 145 ++++++++++++++++++++++----- pr_agent/servers/utils.py | 59 +++++++++++ pr_agent/settings/configuration.toml | 20 ++++ pr_agent/tools/pr_reviewer.py | 9 +- 4 files changed, 209 insertions(+), 24 deletions(-) diff --git a/pr_agent/servers/github_app.py b/pr_agent/servers/github_app.py index 37f96e2d..7039e178 100644 --- a/pr_agent/servers/github_app.py +++ b/pr_agent/servers/github_app.py @@ -1,7 +1,7 @@ import copy import os -import time -from typing import Any, Dict +import asyncio.locks +from typing import Any, Dict, List import uvicorn from fastapi import APIRouter, FastAPI, HTTPException, Request, Response @@ -14,8 +14,9 @@ from pr_agent.algo.utils import update_settings_from_args from pr_agent.config_loader import get_settings, global_settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.utils import apply_repo_settings +from pr_agent.git_providers.git_provider import IncrementalPR from pr_agent.log import LoggingFormat, get_logger, setup_logger -from pr_agent.servers.utils import verify_signature +from pr_agent.servers.utils import verify_signature, DefaultDictWithTimeout setup_logger(fmt=LoggingFormat.JSON) @@ -47,6 +48,7 @@ async def handle_marketplace_webhooks(request: Request, response: Response): body = await get_body(request) get_logger().info(f'Request body:\n{body}') + async def get_body(request): try: body = await request.json() @@ -61,7 +63,9 @@ async def get_body(request): return body -_duplicate_requests_cache = {} +_duplicate_requests_cache = DefaultDictWithTimeout(ttl=get_settings().github_app.duplicate_requests_cache_ttl) +_duplicate_branch_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_event_pending_triggers_ttl) +_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_event_pending_triggers_ttl) async def handle_request(body: Dict[str, Any], event: str): @@ -73,7 +77,7 @@ async def handle_request(body: Dict[str, Any], event: str): event: The GitHub event type. """ action = body.get("action") - if not action: + if not (action or event == "push"): return {} agent = PRAgent() bot_user = get_settings().github_app.bot_user @@ -127,22 +131,122 @@ async def handle_request(body: Dict[str, Any], event: str): # avoid double reviews when opening a PR for the first time return {} get_logger().info(f"Performing review because of event={event} and action={action}") - apply_repo_settings(api_url) - for command in get_settings().github_app.pr_commands: - split_command = command.split(" ") - command = split_command[0] - args = split_command[1:] - other_args = update_settings_from_args(args) - new_command = ' '.join([command] + other_args) - get_logger().info(body) - get_logger().info(f"Performing command: {new_command}") - with get_logger().contextualize(**log_context): - await agent.handle_request(api_url, new_command) + await _perform_commands(get_settings().github_app.pr_commands, agent, body, api_url, log_context) + + # handle push event for new commits + elif event == "push" and get_settings().github_app.handle_push_event: + get_logger().debug(f"[PUSH] {body=}") + # get the branch name + ref = body.get("ref") + get_logger().debug(f"[PUSH] {ref=}") + if not (ref and ref.startswith("refs/heads/")): + return {} + branch = ref.removeprefix("refs/heads/") + get_logger().debug(f"[PUSH] {branch=}") + # skip first push (PR will follow) + if body.get("created"): + get_logger().debug("[PUSH] skipping first push") + return {} + # skip if no relevant commits (e.g. no commits at all, only bot/merge commits) + commits = body.get("commits", []) + get_logger().debug(f"[PUSH] {len(commits)} {commits=}") + bot_commits = [commit for commit in commits if commit.get("author", {}).get("username", "") == bot_user] + get_logger().debug(f"[PUSH] {len(bot_commits)} {bot_commits=}") + merge_commits = [commit for commit in commits if commit.get("message", "").startswith("Merge branch ")] + get_logger().debug(f"[PUSH] {len(merge_commits)} {merge_commits=}") + commit_ids_to_ignore = set() + if get_settings().github_app.push_event_ignore_bot_commits: + commit_ids_to_ignore.update({commit.get("id") for commit in bot_commits}) + if get_settings().github_app.push_event_ignore_bot_commits: + commit_ids_to_ignore.update({commit.get("id") for commit in merge_commits}) + commits = [commit for commit in commits if commit.get("id") not in commit_ids_to_ignore] + if not commits: + return {} + # TODO: consider adding some custom prompt to instruct the PR-Agent how to address bot commits + + # Prevent triggering multiple times for subsequent push events when one is enough: + # The first event will trigger the processing, and if there's a second event in the meanwhile it will wait. + # Any more events will be discarded, because they will all trigger the exact same processing on the PR. + # We let the second event wait instead of discarding it because while the first event was being processed, + # more commits may have been pushed that led to the subsequent push events, + # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. + current_active_tasks = _duplicate_branch_push_triggers.setdefault(branch, 0) + max_active_tasks = 2 if get_settings().github_app.push_event_pending_triggers_backlog else 1 + if current_active_tasks < max_active_tasks: + # first and second tasks can enter + get_logger().info( + f"Continue processing push event for {branch=} because there are {current_active_tasks} active tasks" + ) + _duplicate_branch_push_triggers[branch] += 1 + else: + get_logger().info( + f"Skipping push event for {branch=} because another event already triggered the same processing" + ) + return {} + async with _pending_task_duplicate_push_conditions[branch]: + if current_active_tasks == 1: + # second task waits + get_logger().info( + f"Waiting to process push event for {branch=} because the first task is still in progress" + ) + await _pending_task_duplicate_push_conditions[branch].wait() + get_logger().info(f"Finished waiting to process push event for {branch=} - continue with flow") + + try: + # get PR info for branch + provider = get_git_provider()() + provider.repo, _ = provider._parse_pr_url(body["repository"]["html_url"].rstrip("/") + "/pull/1") + get_logger().debug(f"[PUSH] {provider.repo=}") + github_repo = provider._get_repo() + default_branch = body["repository"]["default_branch"] + get_logger().debug(f"[PUSH] {default_branch=}") + org = body["repository"]["organization"] + get_logger().debug(f"[PUSH] {org=}") + pull_requests = list(github_repo.get_pulls(state="open", base=default_branch, head=":".join((org, branch)))) + get_logger().debug(f"[PUSH] {pull_requests=}") + if not pull_requests: + return {} + pull_request = pull_requests[0].raw_data + # check that the PR is valid to run the agent like in the pull_request event + if not pull_request: + return {} + api_url = pull_request.get("url") + get_logger().debug(f"[PUSH] {api_url=}") + if not api_url: + return {} + log_context["api_url"] = api_url + if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + return {} + if get_settings().github_app.push_event_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: + get_logger().info(f"Skipping incremental review because there was no initial review for PR={api_url} yet") + return {} + get_logger().info(f"Performing incremental review because of event={event} and branch={branch} with PR={api_url}") + await _perform_commands(get_settings().github_app.push_commands, agent, body, api_url, log_context) + + finally: + # release the waiting task block + async with _pending_task_duplicate_push_conditions[branch]: + _pending_task_duplicate_push_conditions[branch].notify(1) + _duplicate_branch_push_triggers[branch] -= 1 get_logger().info("event or action does not require handling") return {} +async def _perform_commands(commands: List[str], agent: PRAgent, body: dict, api_url: str, log_context: dict): + apply_repo_settings(api_url) + for command in commands: + split_command = command.split(" ") + command = split_command[0] + args = split_command[1:] + other_args = update_settings_from_args(args) + new_command = ' '.join([command] + other_args) + get_logger().info(body) + get_logger().info(f"Performing command: {new_command}") + with get_logger().contextualize(**log_context): + await agent.handle_request(api_url, new_command) + + def _is_duplicate_request(body: Dict[str, Any]) -> bool: """ In some deployments its possible to get duplicate requests if the handling is long, @@ -150,13 +254,8 @@ def _is_duplicate_request(body: Dict[str, Any]) -> bool: """ request_hash = hash(str(body)) get_logger().info(f"request_hash: {request_hash}") - request_time = time.monotonic() - ttl = get_settings().github_app.duplicate_requests_cache_ttl # in seconds - to_delete = [key for key, key_time in _duplicate_requests_cache.items() if request_time - key_time > ttl] - for key in to_delete: - del _duplicate_requests_cache[key] - is_duplicate = request_hash in _duplicate_requests_cache - _duplicate_requests_cache[request_hash] = request_time + is_duplicate = _duplicate_requests_cache.get(request_hash, False) + _duplicate_requests_cache[request_hash] = True if is_duplicate: get_logger().info(f"Ignoring duplicate request {request_hash}") return is_duplicate diff --git a/pr_agent/servers/utils.py b/pr_agent/servers/utils.py index c24b880c..12dd85ae 100644 --- a/pr_agent/servers/utils.py +++ b/pr_agent/servers/utils.py @@ -1,5 +1,8 @@ import hashlib import hmac +import time +from collections import defaultdict +from typing import Callable, Any from fastapi import HTTPException @@ -25,3 +28,59 @@ def verify_signature(payload_body, secret_token, signature_header): class RateLimitExceeded(Exception): """Raised when the git provider API rate limit has been exceeded.""" pass + + +class DefaultDictWithTimeout(defaultdict): + """A defaultdict with a time-to-live (TTL).""" + + def __init__( + self, + default_factory: Callable[[], Any] = None, + ttl: int = None, + refresh_interval: int = 60, + update_key_time_on_get: bool = True, + *args, + **kwargs, + ): + """ + Args: + default_factory: The default factory to use for keys that are not in the dictionary. + ttl: The time-to-live (TTL) in seconds. + refresh_interval: How often to refresh the dict and delete items older than the TTL. + update_key_time_on_get: Whether to update the access time of a key also on get (or only when set). + """ + super().__init__(default_factory, *args, **kwargs) + self.__key_times = dict() + self.__ttl = ttl + self.__refresh_interval = refresh_interval + self.__update_key_time_on_get = update_key_time_on_get + self.__last_refresh = self.__time() - self.__refresh_interval + + @staticmethod + def __time(): + return time.monotonic() + + def __refresh(self): + if self.__ttl is None: + return + request_time = self.__time() + if request_time - self.__last_refresh > self.__refresh_interval: + return + to_delete = [key for key, key_time in self.__key_times.items() if request_time - key_time > self.__ttl] + for key in to_delete: + del self[key] + self.__last_refresh = request_time + + def __getitem__(self, __key): + if self.__update_key_time_on_get: + self.__key_times[__key] = self.__time() + self.__refresh() + return super().__getitem__(__key) + + def __setitem__(self, __key, __value): + self.__key_times[__key] = self.__time() + return super().__setitem__(__key, __value) + + def __delitem__(self, __key): + del self.__key_times[__key] + return super().__delitem__(__key) diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 6f44bc53..32fcfb83 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -83,6 +83,26 @@ pr_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", "/auto_review", ] +# settings for "push" event +handle_push_event = false +push_event_ignore_bot_commits = true +push_event_ignore_merge_commits = true +push_event_wait_for_initial_review = true +push_event_pending_triggers_backlog = true +push_event_pending_triggers_ttl = 300 +push_commands = [ + "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", + """/auto_review -i \ + --pr_reviewer.require_focused_review=false \ + --pr_reviewer.require_score_review=false \ + --pr_reviewer.require_tests_review=false \ + --pr_reviewer.require_security_review=false \ + --pr_reviewer.require_estimate_effort_to_review=false \ + --pr_reviewer.num_code_suggestions=0 \ + --pr_reviewer.inline_code_comments=false \ + --pr_reviewer.extra_instructions='' \ + """ +] [gitlab] # URL to the gitlab service diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index ed99ddf6..0cc695e3 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -98,6 +98,9 @@ class PRReviewer: if self.is_auto and not get_settings().pr_reviewer.automatic_review: get_logger().info(f'Automatic review is disabled {self.pr_url}') return None + if self.is_auto and self.incremental.is_incremental and not self.incremental.first_new_commit_sha: + get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits") + return None get_logger().info(f'Reviewing PR: {self.pr_url} ...') @@ -228,9 +231,13 @@ class PRReviewer: if self.incremental.is_incremental: last_commit_url = f"{self.git_provider.get_pr_url()}/commits/" \ f"{self.git_provider.incremental.first_new_commit_sha}" + last_commit_msg = self.incremental.commits_range[0].commit.message if self.incremental.commits_range else "" + incremental_review_markdown_text = f"Starting from commit {last_commit_url}" + if last_commit_msg: + incremental_review_markdown_text += f" \n_({last_commit_msg.splitlines(keepends=False)[0]})_" data = OrderedDict(data) data.update({'Incremental PR Review': { - "⏮️ Review for commits since previous PR-Agent review": f"Starting from commit {last_commit_url}"}}) + "⏮️ Review for commits since previous PR-Agent review": incremental_review_markdown_text}}) data.move_to_end('Incremental PR Review', last=False) markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown")) From 02570ea797623fc3edecad5fc40d4b829162f7e2 Mon Sep 17 00:00:00 2001 From: zmeir Date: Thu, 26 Oct 2023 11:24:24 +0300 Subject: [PATCH 2/8] Remove previous review comment on push event --- pr_agent/git_providers/bitbucket_provider.py | 7 ++++- pr_agent/git_providers/codecommit_provider.py | 3 ++ pr_agent/git_providers/gerrit_provider.py | 3 ++ pr_agent/git_providers/git_provider.py | 4 +++ pr_agent/git_providers/github_provider.py | 28 +++++++++++++------ pr_agent/git_providers/gitlab_provider.py | 8 +++++- pr_agent/git_providers/local_git_provider.py | 3 ++ pr_agent/settings/configuration.toml | 2 ++ pr_agent/tools/pr_reviewer.py | 26 ++++++++++++++++- 9 files changed, 73 insertions(+), 11 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index fef51794..fe416d5c 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -142,10 +142,15 @@ class BitbucketProvider(GitProvider): def remove_initial_comment(self): try: for comment in self.temp_comments: - self.pr.delete(f"comments/{comment}") + self.remove_comment(comment) except Exception as e: get_logger().exception(f"Failed to remove temp comments, error: {e}") + def remove_comment(self, comment): + try: + self.pr.delete(f"comments/{comment}") + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") # funtion to create_inline_comment def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): diff --git a/pr_agent/git_providers/codecommit_provider.py b/pr_agent/git_providers/codecommit_provider.py index 4e12f96e..a4836849 100644 --- a/pr_agent/git_providers/codecommit_provider.py +++ b/pr_agent/git_providers/codecommit_provider.py @@ -221,6 +221,9 @@ class CodeCommitProvider(GitProvider): def remove_initial_comment(self): return "" # not implemented yet + def remove_comment(self, comment): + return "" # not implemented yet + def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str): # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/codecommit/client/post_comment_for_compared_commit.html raise NotImplementedError("CodeCommit provider does not support publishing inline comments yet") diff --git a/pr_agent/git_providers/gerrit_provider.py b/pr_agent/git_providers/gerrit_provider.py index ae017fbd..fd67f5f0 100644 --- a/pr_agent/git_providers/gerrit_provider.py +++ b/pr_agent/git_providers/gerrit_provider.py @@ -396,5 +396,8 @@ class GerritProvider(GitProvider): # shutil.rmtree(self.repo_path) pass + def remove_comment(self, comment): + pass + def get_pr_branch(self): return self.repo.head diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index c6740467..6f986cf0 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -71,6 +71,10 @@ class GitProvider(ABC): def remove_initial_comment(self): pass + @abstractmethod + def remove_comment(self, comment): + pass + @abstractmethod def get_languages(self): pass diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 7a47fbf5..88fb3863 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -50,7 +50,7 @@ class GithubProvider(GitProvider): def get_incremental_commits(self): self.commits = list(self.pr.get_commits()) - self.get_previous_review() + self.previous_review = self.get_previous_review(full=True, incremental=True) if self.previous_review: self.incremental.commits_range = self.get_commit_range() # Get all files changed during the commit range @@ -73,13 +73,19 @@ class GithubProvider(GitProvider): break return self.commits[first_new_commit_index:] - def get_previous_review(self): - self.previous_review = None - self.comments = list(self.pr.get_issue_comments()) + def get_previous_review(self, *, full: bool, incremental: bool): + if not (full or incremental): + raise ValueError("At least one of full or incremental must be True") + if not getattr(self, "comments", None): + self.comments = list(self.pr.get_issue_comments()) + prefixes = [] + if full: + prefixes.append("## PR Analysis") + if incremental: + prefixes.append("## Incremental PR Review") for index in range(len(self.comments) - 1, -1, -1): - if self.comments[index].body.startswith("## PR Analysis") or self.comments[index].body.startswith("## Incremental PR Review"): - self.previous_review = self.comments[index] - break + if any(self.comments[index].body.startswith(prefix) for prefix in prefixes): + return self.comments[index] def get_files(self): if self.incremental.is_incremental and self.file_set: @@ -218,10 +224,16 @@ class GithubProvider(GitProvider): try: for comment in getattr(self.pr, 'comments_list', []): if comment.is_temporary: - comment.delete() + self.remove_comment(comment) except Exception as e: get_logger().exception(f"Failed to remove initial comment, error: {e}") + def remove_comment(self, comment): + try: + comment.delete() + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + def get_title(self): return self.pr.title diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 0d09e622..d36fecf9 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -287,10 +287,16 @@ class GitLabProvider(GitProvider): def remove_initial_comment(self): try: for comment in self.temp_comments: - comment.delete() + self.remove_comment(comment) except Exception as e: get_logger().exception(f"Failed to remove temp comments, error: {e}") + def remove_comment(self, comment): + try: + comment.delete() + except Exception as e: + get_logger().exception(f"Failed to remove comment, error: {e}") + def get_title(self): return self.mr.title diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 5fa9f0be..0ef11413 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -140,6 +140,9 @@ class LocalGitProvider(GitProvider): def remove_initial_comment(self): pass # Not applicable to the local git provider, but required by the interface + def remove_comment(self, comment): + pass # Not applicable to the local git provider, but required by the interface + def get_languages(self): """ Calculate percentage of languages in repository. Used for hunk prioritisation. diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 32fcfb83..3fdcf93c 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -24,6 +24,7 @@ num_code_suggestions=4 inline_code_comments = false ask_and_reflect=false automatic_review=true +remove_previous_review_comment=false extra_instructions = "" [pr_description] # /describe # @@ -100,6 +101,7 @@ push_commands = [ --pr_reviewer.require_estimate_effort_to_review=false \ --pr_reviewer.num_code_suggestions=0 \ --pr_reviewer.inline_code_comments=false \ + --pr_reviewer.remove_previous_review_comment=true \ --pr_reviewer.extra_instructions='' \ """ ] diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 0cc695e3..b13b5e17 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -114,9 +114,10 @@ class PRReviewer: if get_settings().config.publish_output: get_logger().info('Pushing PR review...') + previous_review_comment = self._get_previous_review_comment() self.git_provider.publish_comment(pr_comment) self.git_provider.remove_initial_comment() - + self._remove_previous_review_comment(previous_review_comment) if get_settings().pr_reviewer.inline_code_comments: get_logger().info('Pushing inline code comments...') self._publish_inline_code_comments() @@ -318,3 +319,26 @@ class PRReviewer: break return question_str, answer_str + + def _get_previous_review_comment(self): + """ + Get the previous review comment if it exists. + """ + try: + if get_settings().pr_reviewer.remove_previous_review_comment and hasattr(self.git_provider, "get_previous_review"): + return self.git_provider.get_previous_review( + full=not self.incremental.is_incremental, + incremental=self.incremental.is_incremental, + ) + except Exception as e: + get_logger().exception(f"Failed to get previous review comment, error: {e}") + + def _remove_previous_review_comment(self, comment): + """ + Get the previous review comment if it exists. + """ + try: + if get_settings().pr_reviewer.remove_previous_review_comment and comment: + self.git_provider.remove_comment(comment) + except Exception as e: + get_logger().exception(f"Failed to remove previous review comment, error: {e}") From 6541575a0ea499c4e5771f9e5457902486bddb8a Mon Sep 17 00:00:00 2001 From: zmeir Date: Thu, 26 Oct 2023 14:45:57 +0300 Subject: [PATCH 3/8] Refactor to use pull_request synchronize event --- pr_agent/servers/github_app.py | 148 +++++++++++---------------- pr_agent/settings/configuration.toml | 14 +-- 2 files changed, 66 insertions(+), 96 deletions(-) diff --git a/pr_agent/servers/github_app.py b/pr_agent/servers/github_app.py index 7039e178..2bb85a3d 100644 --- a/pr_agent/servers/github_app.py +++ b/pr_agent/servers/github_app.py @@ -1,7 +1,7 @@ import copy import os import asyncio.locks -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple import uvicorn from fastapi import APIRouter, FastAPI, HTTPException, Request, Response @@ -64,8 +64,8 @@ async def get_body(request): _duplicate_requests_cache = DefaultDictWithTimeout(ttl=get_settings().github_app.duplicate_requests_cache_ttl) -_duplicate_branch_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_event_pending_triggers_ttl) -_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_event_pending_triggers_ttl) +_duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) +_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) async def handle_request(body: Dict[str, Any], event: str): @@ -77,7 +77,7 @@ async def handle_request(body: Dict[str, Any], event: str): event: The GitHub event type. """ action = body.get("action") - if not (action or event == "push"): + if not action: return {} agent = PRAgent() bot_user = get_settings().github_app.bot_user @@ -113,126 +113,96 @@ async def handle_request(body: Dict[str, Any], event: str): # handle pull_request event: # automatically review opened/reopened/ready_for_review PRs as long as they're not in draft, # as well as direct review requests from the bot - elif event == 'pull_request': - pull_request = body.get("pull_request") - if not pull_request: - return {} - api_url = pull_request.get("url") - if not api_url: - return {} - log_context["api_url"] = api_url - if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + elif event == 'pull_request' and action != 'synchronize': + pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user) + if not (pull_request and api_url): return {} if action in get_settings().github_app.handle_pr_actions: if action == "review_requested": if body.get("requested_reviewer", {}).get("login", "") != bot_user: return {} - if pull_request.get("created_at") == pull_request.get("updated_at"): - # avoid double reviews when opening a PR for the first time - return {} - get_logger().info(f"Performing review because of event={event} and action={action}") + get_logger().info(f"Performing review for {api_url=} because of {event=} and {action=}") await _perform_commands(get_settings().github_app.pr_commands, agent, body, api_url, log_context) - # handle push event for new commits - elif event == "push" and get_settings().github_app.handle_push_event: - get_logger().debug(f"[PUSH] {body=}") - # get the branch name - ref = body.get("ref") - get_logger().debug(f"[PUSH] {ref=}") - if not (ref and ref.startswith("refs/heads/")): + # handle pull_request event with synchronize action - "push trigger" for new commits + elif event == 'pull_request' and action == 'synchronize' and get_settings().github_app.handle_push_trigger: + pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user) + if not (pull_request and api_url): return {} - branch = ref.removeprefix("refs/heads/") - get_logger().debug(f"[PUSH] {branch=}") - # skip first push (PR will follow) - if body.get("created"): - get_logger().debug("[PUSH] skipping first push") - return {} - # skip if no relevant commits (e.g. no commits at all, only bot/merge commits) - commits = body.get("commits", []) - get_logger().debug(f"[PUSH] {len(commits)} {commits=}") - bot_commits = [commit for commit in commits if commit.get("author", {}).get("username", "") == bot_user] - get_logger().debug(f"[PUSH] {len(bot_commits)} {bot_commits=}") - merge_commits = [commit for commit in commits if commit.get("message", "").startswith("Merge branch ")] - get_logger().debug(f"[PUSH] {len(merge_commits)} {merge_commits=}") - commit_ids_to_ignore = set() - if get_settings().github_app.push_event_ignore_bot_commits: - commit_ids_to_ignore.update({commit.get("id") for commit in bot_commits}) - if get_settings().github_app.push_event_ignore_bot_commits: - commit_ids_to_ignore.update({commit.get("id") for commit in merge_commits}) - commits = [commit for commit in commits if commit.get("id") not in commit_ids_to_ignore] - if not commits: - return {} - # TODO: consider adding some custom prompt to instruct the PR-Agent how to address bot commits - # Prevent triggering multiple times for subsequent push events when one is enough: - # The first event will trigger the processing, and if there's a second event in the meanwhile it will wait. + # TODO: do we still want to get the list of commits to filter bot/merge commits? + before_sha = body.get("before") + after_sha = body.get("after") + merge_commit_sha = pull_request.get("merge_commit_sha") + if before_sha == after_sha: + return {} + if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha: + return {} + if get_settings().github_app.push_trigger_ignore_bot_commits and body.get("sender", {}).get("login", "") == bot_user: + return {} + + # Prevent triggering multiple times for subsequent push triggers when one is enough: + # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait. # Any more events will be discarded, because they will all trigger the exact same processing on the PR. # We let the second event wait instead of discarding it because while the first event was being processed, - # more commits may have been pushed that led to the subsequent push events, + # more commits may have been pushed that led to the subsequent events, # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. - current_active_tasks = _duplicate_branch_push_triggers.setdefault(branch, 0) - max_active_tasks = 2 if get_settings().github_app.push_event_pending_triggers_backlog else 1 + current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0) + max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1 if current_active_tasks < max_active_tasks: - # first and second tasks can enter + # first task can enter, and second tasks too if backlog is enabled get_logger().info( - f"Continue processing push event for {branch=} because there are {current_active_tasks} active tasks" + f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks" ) - _duplicate_branch_push_triggers[branch] += 1 + _duplicate_push_triggers[api_url] += 1 else: get_logger().info( - f"Skipping push event for {branch=} because another event already triggered the same processing" + f"Skipping push trigger for {api_url=} because another event already triggered the same processing" ) return {} - async with _pending_task_duplicate_push_conditions[branch]: + async with _pending_task_duplicate_push_conditions[api_url]: if current_active_tasks == 1: # second task waits get_logger().info( - f"Waiting to process push event for {branch=} because the first task is still in progress" + f"Waiting to process push trigger for {api_url=} because the first task is still in progress" ) - await _pending_task_duplicate_push_conditions[branch].wait() - get_logger().info(f"Finished waiting to process push event for {branch=} - continue with flow") + await _pending_task_duplicate_push_conditions[api_url].wait() + get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow") try: - # get PR info for branch - provider = get_git_provider()() - provider.repo, _ = provider._parse_pr_url(body["repository"]["html_url"].rstrip("/") + "/pull/1") - get_logger().debug(f"[PUSH] {provider.repo=}") - github_repo = provider._get_repo() - default_branch = body["repository"]["default_branch"] - get_logger().debug(f"[PUSH] {default_branch=}") - org = body["repository"]["organization"] - get_logger().debug(f"[PUSH] {org=}") - pull_requests = list(github_repo.get_pulls(state="open", base=default_branch, head=":".join((org, branch)))) - get_logger().debug(f"[PUSH] {pull_requests=}") - if not pull_requests: + if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: + get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet") return {} - pull_request = pull_requests[0].raw_data - # check that the PR is valid to run the agent like in the pull_request event - if not pull_request: - return {} - api_url = pull_request.get("url") - get_logger().debug(f"[PUSH] {api_url=}") - if not api_url: - return {} - log_context["api_url"] = api_url - if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: - return {} - if get_settings().github_app.push_event_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: - get_logger().info(f"Skipping incremental review because there was no initial review for PR={api_url} yet") - return {} - get_logger().info(f"Performing incremental review because of event={event} and branch={branch} with PR={api_url}") + get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}") await _perform_commands(get_settings().github_app.push_commands, agent, body, api_url, log_context) finally: # release the waiting task block - async with _pending_task_duplicate_push_conditions[branch]: - _pending_task_duplicate_push_conditions[branch].notify(1) - _duplicate_branch_push_triggers[branch] -= 1 + async with _pending_task_duplicate_push_conditions[api_url]: + _pending_task_duplicate_push_conditions[api_url].notify(1) + _duplicate_push_triggers[api_url] -= 1 get_logger().info("event or action does not require handling") return {} +def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_user: str) -> Tuple[Dict[str, Any], str]: + invalid_result = {}, "" + pull_request = body.get("pull_request") + if not pull_request: + return invalid_result + api_url = pull_request.get("url") + if not api_url: + return invalid_result + log_context["api_url"] = api_url + if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + return invalid_result + if action in ("review_requested", "synchronize") and pull_request.get("created_at") == pull_request.get("updated_at"): + # avoid double reviews when opening a PR for the first time + return invalid_result + return pull_request, api_url + + async def _perform_commands(commands: List[str], agent: PRAgent, body: dict, api_url: str, log_context: dict): apply_repo_settings(api_url) for command in commands: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 3fdcf93c..525cce7d 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -84,13 +84,13 @@ pr_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", "/auto_review", ] -# settings for "push" event -handle_push_event = false -push_event_ignore_bot_commits = true -push_event_ignore_merge_commits = true -push_event_wait_for_initial_review = true -push_event_pending_triggers_backlog = true -push_event_pending_triggers_ttl = 300 +# settings for "pull_request" event with "synchronize" action - used to detect and handle push triggers for new commits +handle_push_trigger = false +push_trigger_ignore_bot_commits = true +push_trigger_ignore_merge_commits = true +push_trigger_wait_for_initial_review = true +push_trigger_pending_tasks_backlog = true +push_trigger_pending_tasks_ttl = 300 push_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", """/auto_review -i \ From 414f2b6767e568977f0b622d843b4269721f1857 Mon Sep 17 00:00:00 2001 From: zmeir Date: Thu, 26 Oct 2023 16:46:01 +0300 Subject: [PATCH 4/8] Fix incremental review if there are no new commits (would have performed a full review instead) --- pr_agent/git_providers/github_provider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 88fb3863..4fce1133 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -63,7 +63,7 @@ class GithubProvider(GitProvider): def get_commit_range(self): last_review_time = self.previous_review.created_at - first_new_commit_index = 0 + first_new_commit_index = None for index in range(len(self.commits) - 1, -1, -1): if self.commits[index].commit.author.date > last_review_time: self.incremental.first_new_commit_sha = self.commits[index].sha @@ -71,7 +71,7 @@ class GithubProvider(GitProvider): else: self.incremental.last_seen_commit_sha = self.commits[index].sha break - return self.commits[first_new_commit_index:] + return self.commits[first_new_commit_index:] if first_new_commit_index is not None else [] def get_previous_review(self, *, full: bool, incremental: bool): if not (full or incremental): From e6bea76eee744c74996d827717336eaae1e83480 Mon Sep 17 00:00:00 2001 From: Zohar Meir <33152084+zmeir@users.noreply.github.com> Date: Thu, 26 Oct 2023 17:07:16 +0300 Subject: [PATCH 5/8] Typo --- pr_agent/tools/pr_reviewer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index b13b5e17..c8081fde 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -335,7 +335,7 @@ class PRReviewer: def _remove_previous_review_comment(self, comment): """ - Get the previous review comment if it exists. + Remove the previous review comment if it exists. """ try: if get_settings().pr_reviewer.remove_previous_review_comment and comment: From 013a689b332ac8b32d76691f66291919712e69b2 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Sun, 29 Oct 2023 10:43:04 +0200 Subject: [PATCH 6/8] generate_labels fix --- docs/GENERATE_CUSTOM_LABELS.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/GENERATE_CUSTOM_LABELS.md b/docs/GENERATE_CUSTOM_LABELS.md index 5c1743f4..b5b555f7 100644 --- a/docs/GENERATE_CUSTOM_LABELS.md +++ b/docs/GENERATE_CUSTOM_LABELS.md @@ -1,16 +1,16 @@ # Generate Custom Labels -The `generte_labels` tool scans the PR code changes, and given a list of labels and their descriptions, it automatically suggests labels that match the PR code changes. +The `generate_labels` tool scans the PR code changes, and given a list of labels and their descriptions, it automatically suggests labels that match the PR code changes. It can be invoked manually by commenting on any PR: ``` -/generte_labels +/generate_labels ``` For example: If we wish to add detect changes to SQL queries in a given PR, we can add the following custom label along with its description: -When running the `generte_labels` tool on a PR that includes changes in SQL queries, it will automatically suggest the custom label: +When running the `generate_labels` tool on a PR that includes changes in SQL queries, it will automatically suggest the custom label: ### Configuration options @@ -24,7 +24,7 @@ description = "Description of when AI should suggest this label" - You can add modify the list to include all the custom labels you wish to use in your repository. #### Github Action -To use the `generte_labels` tool with Github Action: +To use the `generate_labels` tool with Github Action: - Add the following file to your repository under `env` section in `.github/workflows/pr_agent.yml` - Comma separated list of custom labels and their descriptions From 3434296792ce6114fb8cf3c74b45b43f84df29b1 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Sun, 29 Oct 2023 13:02:07 +0200 Subject: [PATCH 7/8] Documentation --- docs/GENERATE_CUSTOM_LABELS.md | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/GENERATE_CUSTOM_LABELS.md b/docs/GENERATE_CUSTOM_LABELS.md index b5b555f7..5bab2e33 100644 --- a/docs/GENERATE_CUSTOM_LABELS.md +++ b/docs/GENERATE_CUSTOM_LABELS.md @@ -13,23 +13,23 @@ If we wish to add detect changes to SQL queries in a given PR, we can add the fo When running the `generate_labels` tool on a PR that includes changes in SQL queries, it will automatically suggest the custom label: -### Configuration options -To enable custom labels, you need to add the following configuration to the [custom_labels file](./../pr_agent/settings/custom_labels.toml): +### How to enable custom labels +#### CLI +To enable custom labels, you need to apply the [configuration changes](#configuration-changes) to the [custom_labels file](./../pr_agent/settings/custom_labels.toml): +#### Github Action and Gihub App +To enable custom labels, you need to apply the [configuration changes](#configuration-changes) to the `.pr_agent.toml` file in you repository. + +#### Configuration changes - Change `enable_custom_labels` to True: This will turn off the default labels and enable the custom labels provided in the custom_labels.toml file. - - Add the custom labels to the custom_labels.toml file. It should be formatted as follows: + - Add the custom labels. It should be formatted as follows: ``` +[config] +enable_custom_labels=true + [custom_labels."Custom Label Name"] description = "Description of when AI should suggest this label" -``` - - You can add modify the list to include all the custom labels you wish to use in your repository. -#### Github Action -To use the `generate_labels` tool with Github Action: - -- Add the following file to your repository under `env` section in `.github/workflows/pr_agent.yml` -- Comma separated list of custom labels and their descriptions -- The number of labels and descriptions should be the same and in the same order (empty descriptions are allowed): +[custom_labels."Custom Label 2"] +description = "Description of when AI should suggest this label 2" ``` -CUSTOM_LABELS: "label1, label2, ..." -CUSTOM_LABELS_DESCRIPTION: "label1 description, label2 description, ..." -``` \ No newline at end of file + From 49e3d5ec5f17d890b631fd4602525960612ea5a5 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Sun, 29 Oct 2023 13:58:01 +0200 Subject: [PATCH 8/8] Add documentation --- README.md | 3 +++ docs/DESCRIBE.md | 2 +- docs/GENERATE_CUSTOM_LABELS.md | 3 ++- docs/REVIEW.md | 1 + docs/TOOLS_GUIDE.md | 1 + 5 files changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 26cb77d1..2c459ae5 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,8 @@ CodiumAI `PR-Agent` is an open-source tool aiming to help developers review pull ‣ **Find Similar Issue ([`/similar_issue`](./docs/SIMILAR_ISSUE.md))**: Automatically retrieves and presents similar issues \ ‣ **Add Documentation ([`/add_docs`](./docs/ADD_DOCUMENTATION.md))**: Automatically adds documentation to un-documented functions/classes in the PR. +\ +‣ **Generate Custom Labels ([`/generate_labels`](./docs/GENERATE_CUSTOM_LABELS.md))**: Automatically suggests custom labels based on the PR code changes. See the [Installation Guide](./INSTALL.md) for instructions how to install and run the tool on different platforms. @@ -115,6 +117,7 @@ See the [Tools Guide](./docs/TOOLS_GUIDE.md) for detailed description of the dif | | Update CHANGELOG.md | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | | Find similar issue | :white_check_mark: | | | | | | | | Add Documentation | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | :white_check_mark: | +| | Generate Labels | :white_check_mark: | :white_check_mark: | | | | | | | | | | | | | | USAGE | CLI | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | App / webhook | :white_check_mark: | :white_check_mark: | | | | diff --git a/docs/DESCRIBE.md b/docs/DESCRIBE.md index 3db21a13..d9402dec 100644 --- a/docs/DESCRIBE.md +++ b/docs/DESCRIBE.md @@ -26,7 +26,7 @@ Under the section 'pr_description', the [configuration file](./../pr_agent/setti - `keep_original_user_title`: if set to true, the tool will keep the original PR title, and won't change it. Default is false. - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...". - +- To enable `custom labels`, apply the configuration changes described [here](./GENERATE_CUSTOM_LABELS.md#configuration-changes) ### Markers template markers enable to easily integrate user's content and auto-generated content, with a template-like mechanism. diff --git a/docs/GENERATE_CUSTOM_LABELS.md b/docs/GENERATE_CUSTOM_LABELS.md index 5bab2e33..596ab5ba 100644 --- a/docs/GENERATE_CUSTOM_LABELS.md +++ b/docs/GENERATE_CUSTOM_LABELS.md @@ -22,7 +22,8 @@ To enable custom labels, you need to apply the [configuration changes](#configur #### Configuration changes - Change `enable_custom_labels` to True: This will turn off the default labels and enable the custom labels provided in the custom_labels.toml file. - Add the custom labels. It should be formatted as follows: - ``` + +``` [config] enable_custom_labels=true diff --git a/docs/REVIEW.md b/docs/REVIEW.md index 5445ccdd..7f2f42cb 100644 --- a/docs/REVIEW.md +++ b/docs/REVIEW.md @@ -25,6 +25,7 @@ Under the section 'pr_reviewer', the [configuration file](./../pr_agent/settings - `inline_code_comments`: if set to true, the tool will publish the code suggestions as comments on the code diff. Default is false. - `automatic_review`: if set to false, no automatic reviews will be done. Default is true. - `extra_instructions`: Optional extra instructions to the tool. For example: "focus on the changes in the file X. Ignore change in ...". +- To enable `custom labels`, apply the configuration changes described [here](./GENERATE_CUSTOM_LABELS.md#configuration-changes) #### Incremental Mode For an incremental review, which only considers changes since the last PR-Agent review, this can be useful when working on the PR in an iterative manner, and you want to focus on the changes since the last review instead of reviewing the entire PR again, the following command can be used: ``` diff --git a/docs/TOOLS_GUIDE.md b/docs/TOOLS_GUIDE.md index b3831961..545f6b27 100644 --- a/docs/TOOLS_GUIDE.md +++ b/docs/TOOLS_GUIDE.md @@ -6,5 +6,6 @@ - [SIMILAR_ISSUE](./SIMILAR_ISSUE.md) - [UPDATE CHANGELOG](./UPDATE_CHANGELOG.md) - [ADD DOCUMENTATION](./ADD_DOCUMENTATION.md) +- [GENERATE CUSTOM LABELS](./GENERATE_CUSTOM_LABELS.md) See the **[installation guide](/INSTALL.md)** for instructions on how to setup PR-Agent. \ No newline at end of file