diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index c818b6d3..39927c45 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -34,8 +34,10 @@ class GithubProvider(GitProvider): self.incremental = incremental if pr_url and 'pull' in pr_url: self.set_pr(pr_url) - self.pr_commits = self.pr.get_commits() - self.last_commit_id = list(self.pr_commits)[-1] + self.pr_commits = list(self.pr.get_commits()) + if self.incremental.is_incremental: + self.get_incremental_commits() + self.last_commit_id = self.pr_commits[-1] self.pr_url = self.get_pr_url() # pr_url for github actions can be as api.github.com, so we need to get the url from the pr object else: self.pr_commits = None @@ -49,8 +51,6 @@ class GithubProvider(GitProvider): def set_pr(self, pr_url: str): self.repo, self.pr_num = self._parse_pr_url(pr_url) self.pr = self._get_pr() - if self.incremental.is_incremental: - self.get_incremental_commits() def get_incremental_commits(self): if not self.pr_commits: @@ -88,7 +88,7 @@ class GithubProvider(GitProvider): self.comments = list(self.pr.get_issue_comments()) prefixes = [] if full: - prefixes.append("## PR Analysis") + prefixes.append("## PR Review") if incremental: prefixes.append("## Incremental PR Review") for index in range(len(self.comments) - 1, -1, -1): diff --git a/pr_agent/servers/github_app.py b/pr_agent/servers/github_app.py index 723ee207..2191c4d6 100644 --- a/pr_agent/servers/github_app.py +++ b/pr_agent/servers/github_app.py @@ -35,7 +35,6 @@ async def handle_github_webhooks(request: Request, response: Response): body = await get_body(request) - get_logger().debug(f'Request body:\n{body}') installation_id = body.get("installation", {}).get("id") context["installation_id"] = installation_id context["settings"] = copy.deepcopy(global_settings) @@ -64,10 +63,133 @@ async def get_body(request): return body -_duplicate_requests_cache = DefaultDictWithTimeout(ttl=get_settings().github_app.duplicate_requests_cache_ttl) _duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) _pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl) +async def handle_comments_on_pr(body: Dict[str, Any], + event: str, + sender: str, + action: str, + log_context: Dict[str, Any], + agent: PRAgent): + if "comment" not in body: + return {} + comment_body = body.get("comment", {}).get("body") + if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"): + get_logger().info("Ignoring comment not starting with /") + return {} + disable_eyes = False + if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]: + api_url = body["issue"]["pull_request"]["url"] + elif "comment" in body and "pull_request_url" in body["comment"]: + api_url = body["comment"]["pull_request_url"] + try: + if ('/ask' in comment_body and + 'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"): + # comment on a code line in the "files changed" tab + comment_body = handle_line_comments(body, comment_body) + disable_eyes = True + except Exception as e: + get_logger().error(f"Failed to handle line comments: {e}") + else: + return {} + log_context["api_url"] = api_url + comment_id = body.get("comment", {}).get("id") + provider = get_git_provider()(pr_url=api_url) + with get_logger().contextualize(**log_context): + get_logger().info(f"Processing comment on PR {api_url=}, comment_body={comment_body}") + await agent.handle_request(api_url, comment_body, + notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes)) + +async def handle_new_pr_opened(body: Dict[str, Any], + event: str, + sender: str, + action: str, + log_context: Dict[str, Any], + agent: PRAgent): + title = body.get("pull_request", {}).get("title", "") + + # logic to ignore PRs with specific titles (e.g. "[Auto] ...") + ignore_pr_title_re = get_settings().get("GITHUB_APP.IGNORE_PR_TITLE", []) + if not isinstance(ignore_pr_title_re, list): + ignore_pr_title_re = [ignore_pr_title_re] + if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re): + get_logger().info(f"Ignoring PR with title '{title}' due to github_app.ignore_pr_title setting") + return {} + + pull_request, api_url = _check_pull_request_event(action, body, log_context) + if not (pull_request and api_url): + get_logger().info(f"Invalid PR event: {action=} {api_url=}") + return {} + if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review', 'review_requested'] + await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context) + +async def handle_push_trigger_for_new_commits(body: Dict[str, Any], + event: str, + sender: str, + action: str, + log_context: Dict[str, Any], + agent: PRAgent): + pull_request, api_url = _check_pull_request_event(action, body, log_context) + if not (pull_request and api_url): + return {} + + apply_repo_settings(api_url) # we need to apply the repo settings to get the correct settings for the PR. This is quite expensive - a call to the git provider is made for each PR event. + if not get_settings().github_app.handle_push_trigger: + return {} + + # TODO: do we still want to get the list of commits to filter bot/merge commits? + before_sha = body.get("before") + after_sha = body.get("after") + merge_commit_sha = pull_request.get("merge_commit_sha") + if before_sha == after_sha: + return {} + if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha: + return {} + + # Prevent triggering multiple times for subsequent push triggers when one is enough: + # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait. + # Any more events will be discarded, because they will all trigger the exact same processing on the PR. + # We let the second event wait instead of discarding it because while the first event was being processed, + # more commits may have been pushed that led to the subsequent events, + # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. + current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0) + max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1 + if current_active_tasks < max_active_tasks: + # first task can enter, and second tasks too if backlog is enabled + get_logger().info( + f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks" + ) + _duplicate_push_triggers[api_url] += 1 + else: + get_logger().info( + f"Skipping push trigger for {api_url=} because another event already triggered the same processing" + ) + return {} + async with _pending_task_duplicate_push_conditions[api_url]: + if current_active_tasks == 1: + # second task waits + get_logger().info( + f"Waiting to process push trigger for {api_url=} because the first task is still in progress" + ) + await _pending_task_duplicate_push_conditions[api_url].wait() + get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow") + + try: + if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url, + incremental=IncrementalPR( + True)).previous_review: + get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet") + return {} + get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}") + await _perform_auto_commands_github("push_commands", agent, body, api_url, log_context) + + finally: + # release the waiting task block + async with _pending_task_duplicate_push_conditions[api_url]: + _pending_task_duplicate_push_conditions[api_url].notify(1) + _duplicate_push_triggers[api_url] -= 1 + async def handle_request(body: Dict[str, Any], event: str): """ @@ -75,144 +197,35 @@ async def handle_request(body: Dict[str, Any], event: str): Args: body: The request body. - event: The GitHub event type. + event: The GitHub event type (e.g. "pull_request", "issue_comment", etc.). """ - action = body.get("action") + action = body.get("action") # "created", "opened", "reopened", "ready_for_review", "review_requested", "synchronize" if not action: return {} agent = PRAgent() - bot_user = get_settings().github_app.bot_user sender = body.get("sender", {}).get("login") log_context = {"action": action, "event": event, "sender": sender, "server_type": "github_app"} - if get_settings().github_app.duplicate_requests_cache and _is_duplicate_request(body): - return {} - - # handle all sorts of comment events (e.g. issue_comment) + # handle comments on PRs if action == 'created': - if "comment" not in body: - return {} - comment_body = body.get("comment", {}).get("body") - if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"): - get_logger().info("Ignoring comment not starting with /") - return {} - if sender and bot_user in sender: - get_logger().info(f"Ignoring comment from {bot_user} user") - return {} - get_logger().info(f"Processing comment from {sender} user") - disable_eyes = False - if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]: - api_url = body["issue"]["pull_request"]["url"] - elif "comment" in body and "pull_request_url" in body["comment"]: - api_url = body["comment"]["pull_request_url"] - try: - if ('/ask' in comment_body and - 'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"): - comment_body = handle_line_comments(body, comment_body) - disable_eyes = True - except Exception as e: - get_logger().error(f"Failed to handle line comments: {e}") - - else: - return {} - log_context["api_url"] = api_url - get_logger().info(body) - get_logger().info(f"Handling comment because of event={event} and action={action}") - comment_id = body.get("comment", {}).get("id") - provider = get_git_provider()(pr_url=api_url) - with get_logger().contextualize(**log_context): - await agent.handle_request(api_url, comment_body, - notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes)) - - # handle pull_request event: - # automatically review opened/reopened/ready_for_review PRs as long as they're not in draft, - # as well as direct review requests from the bot + get_logger().debug(f'Request body:\n{body}') + await handle_comments_on_pr(body, event, sender, action, log_context, agent) + # handle new PRs elif event == 'pull_request' and action != 'synchronize': - title = body.get("pull_request", {}).get("title", "") - ignore_pr_title_re = get_settings().get("GITHUB_APP.IGNORE_PR_TITLE", []) - if not isinstance(ignore_pr_title_re, list): - ignore_pr_title_re = [ignore_pr_title_re] - if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re): - get_logger().info(f"Ignoring PR with title '{title}' due to github_app.ignore_pr_title setting") - return {} - pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user) - if not (pull_request and api_url): - return {} - if action in get_settings().github_app.handle_pr_actions: - if action == "review_requested": - if body.get("requested_reviewer", {}).get("login", "") != bot_user: - return {} - get_logger().info(f"Performing review for {api_url=} because of {event=} and {action=}") - await _perform_commands("pr_commands", agent, body, api_url, log_context) - + get_logger().debug(f'Request body:\n{body}') + await handle_new_pr_opened(body, event, sender, action, log_context, agent) # handle pull_request event with synchronize action - "push trigger" for new commits elif event == 'pull_request' and action == 'synchronize': - pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user) - if not (pull_request and api_url): - return {} - - apply_repo_settings(api_url) - if not get_settings().github_app.handle_push_trigger: - return {} - - # TODO: do we still want to get the list of commits to filter bot/merge commits? - before_sha = body.get("before") - after_sha = body.get("after") - merge_commit_sha = pull_request.get("merge_commit_sha") - if before_sha == after_sha: - return {} - if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha: - return {} - if get_settings().github_app.push_trigger_ignore_bot_commits and body.get("sender", {}).get("login", "") == bot_user: - return {} - - # Prevent triggering multiple times for subsequent push triggers when one is enough: - # The first push will trigger the processing, and if there's a second push in the meanwhile it will wait. - # Any more events will be discarded, because they will all trigger the exact same processing on the PR. - # We let the second event wait instead of discarding it because while the first event was being processed, - # more commits may have been pushed that led to the subsequent events, - # so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting. - current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0) - max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1 - if current_active_tasks < max_active_tasks: - # first task can enter, and second tasks too if backlog is enabled - get_logger().info( - f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks" - ) - _duplicate_push_triggers[api_url] += 1 - else: - get_logger().info( - f"Skipping push trigger for {api_url=} because another event already triggered the same processing" - ) - return {} - async with _pending_task_duplicate_push_conditions[api_url]: - if current_active_tasks == 1: - # second task waits - get_logger().info( - f"Waiting to process push trigger for {api_url=} because the first task is still in progress" - ) - await _pending_task_duplicate_push_conditions[api_url].wait() - get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow") - - try: - if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review: - get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet") - return {} - get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}") - await _perform_commands("push_commands", agent, body, api_url, log_context) - - finally: - # release the waiting task block - async with _pending_task_duplicate_push_conditions[api_url]: - _pending_task_duplicate_push_conditions[api_url].notify(1) - _duplicate_push_triggers[api_url] -= 1 - - get_logger().info("event or action does not require handling") - return {} + get_logger().debug(f'Request body:\n{body}') + await handle_push_trigger_for_new_commits(body, event, sender, action, log_context, agent) + else: + get_logger().info(f"event {event=} action {action=} does not require any handling") + return {} -def handle_line_comments(body, comment_body): - # handle line comments +def handle_line_comments(body: Dict, comment_body: [str, Any]) -> str: + if not comment_body: + return "" start_line = body["comment"]["start_line"] end_line = body["comment"]["line"] start_line = end_line if not start_line else start_line @@ -227,7 +240,7 @@ def handle_line_comments(body, comment_body): return comment_body -def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_user: str) -> Tuple[Dict[str, Any], str]: +def _check_pull_request_event(action: str, body: dict, log_context: dict) -> Tuple[Dict[str, Any], str]: invalid_result = {}, "" pull_request = body.get("pull_request") if not pull_request: @@ -236,7 +249,7 @@ def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_us if not api_url: return invalid_result log_context["api_url"] = api_url - if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user: + if pull_request.get("draft", True) or pull_request.get("state") != "open": return invalid_result if action in ("review_requested", "synchronize") and pull_request.get("created_at") == pull_request.get("updated_at"): # avoid double reviews when opening a PR for the first time @@ -244,35 +257,24 @@ def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_us return pull_request, api_url -async def _perform_commands(commands_conf: str, agent: PRAgent, body: dict, api_url: str, log_context: dict): +async def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body: dict, api_url: str, log_context: dict): apply_repo_settings(api_url) commands = get_settings().get(f"github_app.{commands_conf}") + if not commands: + with get_logger().contextualize(**log_context): + get_logger().info(f"New PR, but no auto commands configured") + return for command in commands: split_command = command.split(" ") command = split_command[0] args = split_command[1:] other_args = update_settings_from_args(args) new_command = ' '.join([command] + other_args) - get_logger().info(body) - get_logger().info(f"Performing command: {new_command}") with get_logger().contextualize(**log_context): + get_logger().info(f"New PR opened. Performing auto command '{new_command}', for {api_url=}") await agent.handle_request(api_url, new_command) -def _is_duplicate_request(body: Dict[str, Any]) -> bool: - """ - In some deployments its possible to get duplicate requests if the handling is long, - This function checks if the request is duplicate and if so - ignores it. - """ - request_hash = hash(str(body)) - get_logger().info(f"request_hash: {request_hash}") - is_duplicate = _duplicate_requests_cache.get(request_hash, False) - _duplicate_requests_cache[request_hash] = True - if is_duplicate: - get_logger().info(f"Ignoring duplicate request {request_hash}") - return is_duplicate - - @router.get("/") async def root(): return {"status": "ok"} diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 5c5a5c05..0a43697a 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -133,12 +133,7 @@ try_fix_invalid_inline_comments = true [github_app] # these toggles allows running the github app from custom deployments -bot_user = "github-actions[bot]" override_deployment_type = true -# in some deployments it's possible to get duplicate requests if the handling is long, -# these settings are used to avoid handling duplicate requests. -duplicate_requests_cache = false -duplicate_requests_cache_ttl = 60 # in seconds # settings for "pull_request" event handle_pr_actions = ['opened', 'reopened', 'ready_for_review', 'review_requested'] pr_commands = [ @@ -155,19 +150,7 @@ push_trigger_pending_tasks_backlog = true push_trigger_pending_tasks_ttl = 300 push_commands = [ "/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true", - """/auto_review -i \ - --pr_reviewer.require_focused_review=false \ - --pr_reviewer.require_score_review=false \ - --pr_reviewer.require_tests_review=false \ - --pr_reviewer.require_estimate_effort_to_review=false \ - --pr_reviewer.num_code_suggestions=0 \ - --pr_reviewer.inline_code_comments=false \ - --pr_reviewer.remove_previous_review_comment=true \ - --pr_reviewer.require_all_thresholds_for_incremental_review=false \ - --pr_reviewer.minimal_commits_for_incremental_review=5 \ - --pr_reviewer.minimal_minutes_for_incremental_review=30 \ - --pr_reviewer.extra_instructions='' \ - """ + "/review --pr_reviewer.num_code_suggestions=0", ] ignore_pr_title = []