mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-03 04:10:49 +08:00
Merge pull request #696 from Codium-ai/tr/app_refactor
Refactor GitHup App
This commit is contained in:
@ -34,8 +34,10 @@ class GithubProvider(GitProvider):
|
||||
self.incremental = incremental
|
||||
if pr_url and 'pull' in pr_url:
|
||||
self.set_pr(pr_url)
|
||||
self.pr_commits = self.pr.get_commits()
|
||||
self.last_commit_id = list(self.pr_commits)[-1]
|
||||
self.pr_commits = list(self.pr.get_commits())
|
||||
if self.incremental.is_incremental:
|
||||
self.get_incremental_commits()
|
||||
self.last_commit_id = self.pr_commits[-1]
|
||||
self.pr_url = self.get_pr_url() # pr_url for github actions can be as api.github.com, so we need to get the url from the pr object
|
||||
else:
|
||||
self.pr_commits = None
|
||||
@ -49,8 +51,6 @@ class GithubProvider(GitProvider):
|
||||
def set_pr(self, pr_url: str):
|
||||
self.repo, self.pr_num = self._parse_pr_url(pr_url)
|
||||
self.pr = self._get_pr()
|
||||
if self.incremental.is_incremental:
|
||||
self.get_incremental_commits()
|
||||
|
||||
def get_incremental_commits(self):
|
||||
if not self.pr_commits:
|
||||
@ -88,7 +88,7 @@ class GithubProvider(GitProvider):
|
||||
self.comments = list(self.pr.get_issue_comments())
|
||||
prefixes = []
|
||||
if full:
|
||||
prefixes.append("## PR Analysis")
|
||||
prefixes.append("## PR Review")
|
||||
if incremental:
|
||||
prefixes.append("## Incremental PR Review")
|
||||
for index in range(len(self.comments) - 1, -1, -1):
|
||||
|
@ -35,7 +35,6 @@ async def handle_github_webhooks(request: Request, response: Response):
|
||||
|
||||
body = await get_body(request)
|
||||
|
||||
get_logger().debug(f'Request body:\n{body}')
|
||||
installation_id = body.get("installation", {}).get("id")
|
||||
context["installation_id"] = installation_id
|
||||
context["settings"] = copy.deepcopy(global_settings)
|
||||
@ -64,10 +63,133 @@ async def get_body(request):
|
||||
return body
|
||||
|
||||
|
||||
_duplicate_requests_cache = DefaultDictWithTimeout(ttl=get_settings().github_app.duplicate_requests_cache_ttl)
|
||||
_duplicate_push_triggers = DefaultDictWithTimeout(ttl=get_settings().github_app.push_trigger_pending_tasks_ttl)
|
||||
_pending_task_duplicate_push_conditions = DefaultDictWithTimeout(asyncio.locks.Condition, ttl=get_settings().github_app.push_trigger_pending_tasks_ttl)
|
||||
|
||||
async def handle_comments_on_pr(body: Dict[str, Any],
|
||||
event: str,
|
||||
sender: str,
|
||||
action: str,
|
||||
log_context: Dict[str, Any],
|
||||
agent: PRAgent):
|
||||
if "comment" not in body:
|
||||
return {}
|
||||
comment_body = body.get("comment", {}).get("body")
|
||||
if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"):
|
||||
get_logger().info("Ignoring comment not starting with /")
|
||||
return {}
|
||||
disable_eyes = False
|
||||
if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]:
|
||||
api_url = body["issue"]["pull_request"]["url"]
|
||||
elif "comment" in body and "pull_request_url" in body["comment"]:
|
||||
api_url = body["comment"]["pull_request_url"]
|
||||
try:
|
||||
if ('/ask' in comment_body and
|
||||
'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"):
|
||||
# comment on a code line in the "files changed" tab
|
||||
comment_body = handle_line_comments(body, comment_body)
|
||||
disable_eyes = True
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to handle line comments: {e}")
|
||||
else:
|
||||
return {}
|
||||
log_context["api_url"] = api_url
|
||||
comment_id = body.get("comment", {}).get("id")
|
||||
provider = get_git_provider()(pr_url=api_url)
|
||||
with get_logger().contextualize(**log_context):
|
||||
get_logger().info(f"Processing comment on PR {api_url=}, comment_body={comment_body}")
|
||||
await agent.handle_request(api_url, comment_body,
|
||||
notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))
|
||||
|
||||
async def handle_new_pr_opened(body: Dict[str, Any],
|
||||
event: str,
|
||||
sender: str,
|
||||
action: str,
|
||||
log_context: Dict[str, Any],
|
||||
agent: PRAgent):
|
||||
title = body.get("pull_request", {}).get("title", "")
|
||||
|
||||
# logic to ignore PRs with specific titles (e.g. "[Auto] ...")
|
||||
ignore_pr_title_re = get_settings().get("GITHUB_APP.IGNORE_PR_TITLE", [])
|
||||
if not isinstance(ignore_pr_title_re, list):
|
||||
ignore_pr_title_re = [ignore_pr_title_re]
|
||||
if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
|
||||
get_logger().info(f"Ignoring PR with title '{title}' due to github_app.ignore_pr_title setting")
|
||||
return {}
|
||||
|
||||
pull_request, api_url = _check_pull_request_event(action, body, log_context)
|
||||
if not (pull_request and api_url):
|
||||
get_logger().info(f"Invalid PR event: {action=} {api_url=}")
|
||||
return {}
|
||||
if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review', 'review_requested']
|
||||
await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context)
|
||||
|
||||
async def handle_push_trigger_for_new_commits(body: Dict[str, Any],
|
||||
event: str,
|
||||
sender: str,
|
||||
action: str,
|
||||
log_context: Dict[str, Any],
|
||||
agent: PRAgent):
|
||||
pull_request, api_url = _check_pull_request_event(action, body, log_context)
|
||||
if not (pull_request and api_url):
|
||||
return {}
|
||||
|
||||
apply_repo_settings(api_url) # we need to apply the repo settings to get the correct settings for the PR. This is quite expensive - a call to the git provider is made for each PR event.
|
||||
if not get_settings().github_app.handle_push_trigger:
|
||||
return {}
|
||||
|
||||
# TODO: do we still want to get the list of commits to filter bot/merge commits?
|
||||
before_sha = body.get("before")
|
||||
after_sha = body.get("after")
|
||||
merge_commit_sha = pull_request.get("merge_commit_sha")
|
||||
if before_sha == after_sha:
|
||||
return {}
|
||||
if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha:
|
||||
return {}
|
||||
|
||||
# Prevent triggering multiple times for subsequent push triggers when one is enough:
|
||||
# The first push will trigger the processing, and if there's a second push in the meanwhile it will wait.
|
||||
# Any more events will be discarded, because they will all trigger the exact same processing on the PR.
|
||||
# We let the second event wait instead of discarding it because while the first event was being processed,
|
||||
# more commits may have been pushed that led to the subsequent events,
|
||||
# so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting.
|
||||
current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0)
|
||||
max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1
|
||||
if current_active_tasks < max_active_tasks:
|
||||
# first task can enter, and second tasks too if backlog is enabled
|
||||
get_logger().info(
|
||||
f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks"
|
||||
)
|
||||
_duplicate_push_triggers[api_url] += 1
|
||||
else:
|
||||
get_logger().info(
|
||||
f"Skipping push trigger for {api_url=} because another event already triggered the same processing"
|
||||
)
|
||||
return {}
|
||||
async with _pending_task_duplicate_push_conditions[api_url]:
|
||||
if current_active_tasks == 1:
|
||||
# second task waits
|
||||
get_logger().info(
|
||||
f"Waiting to process push trigger for {api_url=} because the first task is still in progress"
|
||||
)
|
||||
await _pending_task_duplicate_push_conditions[api_url].wait()
|
||||
get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow")
|
||||
|
||||
try:
|
||||
if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url,
|
||||
incremental=IncrementalPR(
|
||||
True)).previous_review:
|
||||
get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet")
|
||||
return {}
|
||||
get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}")
|
||||
await _perform_auto_commands_github("push_commands", agent, body, api_url, log_context)
|
||||
|
||||
finally:
|
||||
# release the waiting task block
|
||||
async with _pending_task_duplicate_push_conditions[api_url]:
|
||||
_pending_task_duplicate_push_conditions[api_url].notify(1)
|
||||
_duplicate_push_triggers[api_url] -= 1
|
||||
|
||||
|
||||
async def handle_request(body: Dict[str, Any], event: str):
|
||||
"""
|
||||
@ -75,144 +197,35 @@ async def handle_request(body: Dict[str, Any], event: str):
|
||||
|
||||
Args:
|
||||
body: The request body.
|
||||
event: The GitHub event type.
|
||||
event: The GitHub event type (e.g. "pull_request", "issue_comment", etc.).
|
||||
"""
|
||||
action = body.get("action")
|
||||
action = body.get("action") # "created", "opened", "reopened", "ready_for_review", "review_requested", "synchronize"
|
||||
if not action:
|
||||
return {}
|
||||
agent = PRAgent()
|
||||
bot_user = get_settings().github_app.bot_user
|
||||
sender = body.get("sender", {}).get("login")
|
||||
log_context = {"action": action, "event": event, "sender": sender, "server_type": "github_app"}
|
||||
|
||||
if get_settings().github_app.duplicate_requests_cache and _is_duplicate_request(body):
|
||||
return {}
|
||||
|
||||
# handle all sorts of comment events (e.g. issue_comment)
|
||||
# handle comments on PRs
|
||||
if action == 'created':
|
||||
if "comment" not in body:
|
||||
return {}
|
||||
comment_body = body.get("comment", {}).get("body")
|
||||
if comment_body and isinstance(comment_body, str) and not comment_body.lstrip().startswith("/"):
|
||||
get_logger().info("Ignoring comment not starting with /")
|
||||
return {}
|
||||
if sender and bot_user in sender:
|
||||
get_logger().info(f"Ignoring comment from {bot_user} user")
|
||||
return {}
|
||||
get_logger().info(f"Processing comment from {sender} user")
|
||||
disable_eyes = False
|
||||
if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]:
|
||||
api_url = body["issue"]["pull_request"]["url"]
|
||||
elif "comment" in body and "pull_request_url" in body["comment"]:
|
||||
api_url = body["comment"]["pull_request_url"]
|
||||
try:
|
||||
if ('/ask' in comment_body and
|
||||
'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"):
|
||||
comment_body = handle_line_comments(body, comment_body)
|
||||
disable_eyes = True
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to handle line comments: {e}")
|
||||
|
||||
else:
|
||||
return {}
|
||||
log_context["api_url"] = api_url
|
||||
get_logger().info(body)
|
||||
get_logger().info(f"Handling comment because of event={event} and action={action}")
|
||||
comment_id = body.get("comment", {}).get("id")
|
||||
provider = get_git_provider()(pr_url=api_url)
|
||||
with get_logger().contextualize(**log_context):
|
||||
await agent.handle_request(api_url, comment_body,
|
||||
notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))
|
||||
|
||||
# handle pull_request event:
|
||||
# automatically review opened/reopened/ready_for_review PRs as long as they're not in draft,
|
||||
# as well as direct review requests from the bot
|
||||
get_logger().debug(f'Request body:\n{body}')
|
||||
await handle_comments_on_pr(body, event, sender, action, log_context, agent)
|
||||
# handle new PRs
|
||||
elif event == 'pull_request' and action != 'synchronize':
|
||||
title = body.get("pull_request", {}).get("title", "")
|
||||
ignore_pr_title_re = get_settings().get("GITHUB_APP.IGNORE_PR_TITLE", [])
|
||||
if not isinstance(ignore_pr_title_re, list):
|
||||
ignore_pr_title_re = [ignore_pr_title_re]
|
||||
if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
|
||||
get_logger().info(f"Ignoring PR with title '{title}' due to github_app.ignore_pr_title setting")
|
||||
return {}
|
||||
pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user)
|
||||
if not (pull_request and api_url):
|
||||
return {}
|
||||
if action in get_settings().github_app.handle_pr_actions:
|
||||
if action == "review_requested":
|
||||
if body.get("requested_reviewer", {}).get("login", "") != bot_user:
|
||||
return {}
|
||||
get_logger().info(f"Performing review for {api_url=} because of {event=} and {action=}")
|
||||
await _perform_commands("pr_commands", agent, body, api_url, log_context)
|
||||
|
||||
get_logger().debug(f'Request body:\n{body}')
|
||||
await handle_new_pr_opened(body, event, sender, action, log_context, agent)
|
||||
# handle pull_request event with synchronize action - "push trigger" for new commits
|
||||
elif event == 'pull_request' and action == 'synchronize':
|
||||
pull_request, api_url = _check_pull_request_event(action, body, log_context, bot_user)
|
||||
if not (pull_request and api_url):
|
||||
return {}
|
||||
|
||||
apply_repo_settings(api_url)
|
||||
if not get_settings().github_app.handle_push_trigger:
|
||||
return {}
|
||||
|
||||
# TODO: do we still want to get the list of commits to filter bot/merge commits?
|
||||
before_sha = body.get("before")
|
||||
after_sha = body.get("after")
|
||||
merge_commit_sha = pull_request.get("merge_commit_sha")
|
||||
if before_sha == after_sha:
|
||||
return {}
|
||||
if get_settings().github_app.push_trigger_ignore_merge_commits and after_sha == merge_commit_sha:
|
||||
return {}
|
||||
if get_settings().github_app.push_trigger_ignore_bot_commits and body.get("sender", {}).get("login", "") == bot_user:
|
||||
return {}
|
||||
|
||||
# Prevent triggering multiple times for subsequent push triggers when one is enough:
|
||||
# The first push will trigger the processing, and if there's a second push in the meanwhile it will wait.
|
||||
# Any more events will be discarded, because they will all trigger the exact same processing on the PR.
|
||||
# We let the second event wait instead of discarding it because while the first event was being processed,
|
||||
# more commits may have been pushed that led to the subsequent events,
|
||||
# so we keep just one waiting as a delegate to trigger the processing for the new commits when done waiting.
|
||||
current_active_tasks = _duplicate_push_triggers.setdefault(api_url, 0)
|
||||
max_active_tasks = 2 if get_settings().github_app.push_trigger_pending_tasks_backlog else 1
|
||||
if current_active_tasks < max_active_tasks:
|
||||
# first task can enter, and second tasks too if backlog is enabled
|
||||
get_logger().info(
|
||||
f"Continue processing push trigger for {api_url=} because there are {current_active_tasks} active tasks"
|
||||
)
|
||||
_duplicate_push_triggers[api_url] += 1
|
||||
else:
|
||||
get_logger().info(
|
||||
f"Skipping push trigger for {api_url=} because another event already triggered the same processing"
|
||||
)
|
||||
return {}
|
||||
async with _pending_task_duplicate_push_conditions[api_url]:
|
||||
if current_active_tasks == 1:
|
||||
# second task waits
|
||||
get_logger().info(
|
||||
f"Waiting to process push trigger for {api_url=} because the first task is still in progress"
|
||||
)
|
||||
await _pending_task_duplicate_push_conditions[api_url].wait()
|
||||
get_logger().info(f"Finished waiting to process push trigger for {api_url=} - continue with flow")
|
||||
|
||||
try:
|
||||
if get_settings().github_app.push_trigger_wait_for_initial_review and not get_git_provider()(api_url, incremental=IncrementalPR(True)).previous_review:
|
||||
get_logger().info(f"Skipping incremental review because there was no initial review for {api_url=} yet")
|
||||
return {}
|
||||
get_logger().info(f"Performing incremental review for {api_url=} because of {event=} and {action=}")
|
||||
await _perform_commands("push_commands", agent, body, api_url, log_context)
|
||||
|
||||
finally:
|
||||
# release the waiting task block
|
||||
async with _pending_task_duplicate_push_conditions[api_url]:
|
||||
_pending_task_duplicate_push_conditions[api_url].notify(1)
|
||||
_duplicate_push_triggers[api_url] -= 1
|
||||
|
||||
get_logger().info("event or action does not require handling")
|
||||
return {}
|
||||
get_logger().debug(f'Request body:\n{body}')
|
||||
await handle_push_trigger_for_new_commits(body, event, sender, action, log_context, agent)
|
||||
else:
|
||||
get_logger().info(f"event {event=} action {action=} does not require any handling")
|
||||
return {}
|
||||
|
||||
|
||||
def handle_line_comments(body, comment_body):
|
||||
# handle line comments
|
||||
def handle_line_comments(body: Dict, comment_body: [str, Any]) -> str:
|
||||
if not comment_body:
|
||||
return ""
|
||||
start_line = body["comment"]["start_line"]
|
||||
end_line = body["comment"]["line"]
|
||||
start_line = end_line if not start_line else start_line
|
||||
@ -227,7 +240,7 @@ def handle_line_comments(body, comment_body):
|
||||
return comment_body
|
||||
|
||||
|
||||
def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_user: str) -> Tuple[Dict[str, Any], str]:
|
||||
def _check_pull_request_event(action: str, body: dict, log_context: dict) -> Tuple[Dict[str, Any], str]:
|
||||
invalid_result = {}, ""
|
||||
pull_request = body.get("pull_request")
|
||||
if not pull_request:
|
||||
@ -236,7 +249,7 @@ def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_us
|
||||
if not api_url:
|
||||
return invalid_result
|
||||
log_context["api_url"] = api_url
|
||||
if pull_request.get("draft", True) or pull_request.get("state") != "open" or pull_request.get("user", {}).get("login", "") == bot_user:
|
||||
if pull_request.get("draft", True) or pull_request.get("state") != "open":
|
||||
return invalid_result
|
||||
if action in ("review_requested", "synchronize") and pull_request.get("created_at") == pull_request.get("updated_at"):
|
||||
# avoid double reviews when opening a PR for the first time
|
||||
@ -244,35 +257,24 @@ def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_us
|
||||
return pull_request, api_url
|
||||
|
||||
|
||||
async def _perform_commands(commands_conf: str, agent: PRAgent, body: dict, api_url: str, log_context: dict):
|
||||
async def _perform_auto_commands_github(commands_conf: str, agent: PRAgent, body: dict, api_url: str, log_context: dict):
|
||||
apply_repo_settings(api_url)
|
||||
commands = get_settings().get(f"github_app.{commands_conf}")
|
||||
if not commands:
|
||||
with get_logger().contextualize(**log_context):
|
||||
get_logger().info(f"New PR, but no auto commands configured")
|
||||
return
|
||||
for command in commands:
|
||||
split_command = command.split(" ")
|
||||
command = split_command[0]
|
||||
args = split_command[1:]
|
||||
other_args = update_settings_from_args(args)
|
||||
new_command = ' '.join([command] + other_args)
|
||||
get_logger().info(body)
|
||||
get_logger().info(f"Performing command: {new_command}")
|
||||
with get_logger().contextualize(**log_context):
|
||||
get_logger().info(f"New PR opened. Performing auto command '{new_command}', for {api_url=}")
|
||||
await agent.handle_request(api_url, new_command)
|
||||
|
||||
|
||||
def _is_duplicate_request(body: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
In some deployments its possible to get duplicate requests if the handling is long,
|
||||
This function checks if the request is duplicate and if so - ignores it.
|
||||
"""
|
||||
request_hash = hash(str(body))
|
||||
get_logger().info(f"request_hash: {request_hash}")
|
||||
is_duplicate = _duplicate_requests_cache.get(request_hash, False)
|
||||
_duplicate_requests_cache[request_hash] = True
|
||||
if is_duplicate:
|
||||
get_logger().info(f"Ignoring duplicate request {request_hash}")
|
||||
return is_duplicate
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def root():
|
||||
return {"status": "ok"}
|
||||
|
@ -133,12 +133,7 @@ try_fix_invalid_inline_comments = true
|
||||
|
||||
[github_app]
|
||||
# these toggles allows running the github app from custom deployments
|
||||
bot_user = "github-actions[bot]"
|
||||
override_deployment_type = true
|
||||
# in some deployments it's possible to get duplicate requests if the handling is long,
|
||||
# these settings are used to avoid handling duplicate requests.
|
||||
duplicate_requests_cache = false
|
||||
duplicate_requests_cache_ttl = 60 # in seconds
|
||||
# settings for "pull_request" event
|
||||
handle_pr_actions = ['opened', 'reopened', 'ready_for_review', 'review_requested']
|
||||
pr_commands = [
|
||||
@ -155,19 +150,7 @@ push_trigger_pending_tasks_backlog = true
|
||||
push_trigger_pending_tasks_ttl = 300
|
||||
push_commands = [
|
||||
"/describe --pr_description.add_original_user_description=true --pr_description.keep_original_user_title=true",
|
||||
"""/auto_review -i \
|
||||
--pr_reviewer.require_focused_review=false \
|
||||
--pr_reviewer.require_score_review=false \
|
||||
--pr_reviewer.require_tests_review=false \
|
||||
--pr_reviewer.require_estimate_effort_to_review=false \
|
||||
--pr_reviewer.num_code_suggestions=0 \
|
||||
--pr_reviewer.inline_code_comments=false \
|
||||
--pr_reviewer.remove_previous_review_comment=true \
|
||||
--pr_reviewer.require_all_thresholds_for_incremental_review=false \
|
||||
--pr_reviewer.minimal_commits_for_incremental_review=5 \
|
||||
--pr_reviewer.minimal_minutes_for_incremental_review=30 \
|
||||
--pr_reviewer.extra_instructions='' \
|
||||
"""
|
||||
"/review --pr_reviewer.num_code_suggestions=0",
|
||||
]
|
||||
ignore_pr_title = []
|
||||
|
||||
|
Reference in New Issue
Block a user