refactor: consolidate PR ignore logic into a single function and update documentation

This commit is contained in:
mrT23
2024-09-07 10:34:57 +03:00
parent 9199d84796
commit 147a8e0ef3
6 changed files with 182 additions and 119 deletions

View File

@ -115,33 +115,25 @@ LANGSMITH_BASE_URL=<url>
## Ignoring automatic commands in PRs
In some cases, you may want to ignore automatic commands in PRs. For example you may want to ignore MR with a specific title, or labels or from/to specific branches.
In some cases, you may want to automatically ignore specific PRs . PR-Agent enables you to ignore PR with a specific title, or from/to specific branches (regex matching).
For example, to ignore MRs with a specific title such as "[AUTO]: foobar", you can add the following to your `configuration.toml` file:
To ignore PRs with a specific title such as "[Bump]: ...", you can add the following to your `configuration.toml` file:
```
[config]
ignore_mr_title = ["\\[AUTO\\]"]
ignore_pr_title = ["\\[Bump\\]"]
```
Where the `ignore_mr_title` is a list of regex patterns to match the MR title you want to ignore.
Where the `ignore_pr_title` is a list of regex patterns to match the PR title you want to ignore. Default is `ignore_pr_title = ["^\\[Auto\\]", "^Auto"]`.
To ignore MRs with specific labels, you can add the following to your `configuration.toml` file:
To ignore PRs from specific source or target branches, you can add the following to your `configuration.toml` file:
```
[config]
ignore_mr_labels = ["auto"]
ignore_pr_source_branches = ['develop', 'main', 'master', 'stage']
ignore_pr_target_branches = ["qa"]
```
Where the `ignore_mr_labels` is a list of labels you want to ignore.
To ignore MRs from specific branches, you can add the following to your `configuration.toml` file:
```
[config]
ignore_mr_source_branches = ['develop', 'main', 'master', 'stage']
ignore_mr_target_branches = ["qa"]
```
Where the `ignore_mr_source_branches` and `ignore_mr_target_branches` are lists of regex patterns to match the source and target branches you want to ignore.
Where the `ignore_pr_source_branches` and `ignore_pr_target_branches` are lists of regex patterns to match the source and target branches you want to ignore.
They are not mutually exclusive, you can use them together or separately.

View File

@ -94,13 +94,6 @@ To cancel the automatic run of all the tools, set:
pr_commands = []
```
You can also disable automatic runs for PRs with specific titles, by setting the `ignore_pr_titles` parameter with the relevant regex. For example:
```
[github_app]
ignore_pr_title = ["^[Auto]", ".*ignore.*"]
```
will ignore PRs with titles that start with "Auto" or contain the word "ignore".
### GitHub app automatic tools for push actions (commits to an open PR)
In addition to running automatic tools when a PR is opened, the GitHub app can also respond to new code that is pushed to an open PR.

View File

@ -3,6 +3,7 @@ import copy
import hashlib
import json
import os
import re
import time
import jwt
@ -91,6 +92,48 @@ async def _perform_commands_bitbucket(commands_conf: str, agent: PRAgent, api_ur
get_logger().error(f"Failed to perform command {command}: {e}")
def is_bot_user(data) -> bool:
try:
if data["data"]["actor"]["type"] != "user":
get_logger().info(f"BitBucket actor type is not 'user': {data['data']['actor']['type']}")
return True
except Exception as e:
get_logger().error("Failed 'is_bot_user' logic: {e}")
return False
def should_process_pr_logic(data) -> bool:
try:
pr_data = data.get("data", {}).get("pullrequest", {})
title = pr_data.get("title", "")
source_branch = pr_data.get("source", {}).get("branch", {}).get("name", "")
target_branch = pr_data.get("destination", {}).get("branch", {}).get("name", "")
# logic to ignore PRs with specific titles
if title:
ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
if not isinstance(ignore_pr_title_re, list):
ignore_pr_title_re = [ignore_pr_title_re]
if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting")
return False
ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
if (ignore_pr_source_branches or ignore_pr_target_branches):
if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
get_logger().info(
f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings")
return False
if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
get_logger().info(
f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings")
return False
except Exception as e:
get_logger().error(f"Failed 'should_process_pr_logic': {e}")
return True
@router.post("/webhook")
async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Request):
app_name = get_settings().get("CONFIG.APP_NAME", "Unknown")
@ -101,13 +144,17 @@ async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Req
input_jwt = jwt_header.split(" ")[1]
data = await request.json()
get_logger().debug(data)
async def inner():
try:
try:
if data["data"]["actor"]["type"] != "user":
# ignore bot users
if is_bot_user(data):
return "OK"
# Check if the PR should be processed
if data.get("event", "") == "pullrequest:created":
if not should_process_pr_logic(data):
return "OK"
except KeyError:
get_logger().error("Failed to get actor type, check previous logs, this shouldn't happen.")
# Get the username of the sender
try:
@ -146,16 +193,6 @@ async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Req
sender_id, pr_url) is not Eligibility.NOT_ELIGIBLE:
if get_settings().get("bitbucket_app.pr_commands"):
await _perform_commands_bitbucket("pr_commands", PRAgent(), pr_url, log_context)
else: # backwards compatibility
auto_review = get_setting_or_env("BITBUCKET_APP.AUTO_REVIEW", None)
if is_true(auto_review): # by default, auto review is disabled
await PRReviewer(pr_url).run()
auto_improve = get_setting_or_env("BITBUCKET_APP.AUTO_IMPROVE", None)
if is_true(auto_improve): # by default, auto improve is disabled
await PRCodeSuggestions(pr_url).run()
auto_describe = get_setting_or_env("BITBUCKET_APP.AUTO_DESCRIBE", None)
if is_true(auto_describe): # by default, auto describe is disabled
await PRDescription(pr_url).run()
elif event == "pullrequest:comment_created":
pr_url = data["data"]["pullrequest"]["links"]["html"]["href"]
log_context["api_url"] = pr_url

View File

@ -130,7 +130,6 @@ async def handle_new_pr_opened(body: Dict[str, Any],
title = body.get("pull_request", {}).get("title", "")
get_settings().config.is_auto_command = True
pull_request, api_url = _check_pull_request_event(action, body, log_context)
if not (pull_request and api_url):
get_logger().info(f"Invalid PR event: {action=} {api_url=}")
@ -138,36 +137,6 @@ async def handle_new_pr_opened(body: Dict[str, Any],
if action in get_settings().github_app.handle_pr_actions: # ['opened', 'reopened', 'ready_for_review']
# logic to ignore PRs with specific titles (e.g. "[Auto] ...")
apply_repo_settings(api_url)
ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
if not isinstance(ignore_pr_title_re, list):
ignore_pr_title_re = [ignore_pr_title_re]
if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
get_logger().info(f"Ignoring PR with title '{title}' due to github_app.ignore_pr_title setting")
return {}
# logic to ignore PRs with specific labels or source branches or target branches.
ignore_pr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
if ignore_pr_labels:
labels = [label['name'] for label in pull_request.get("labels", [])]
if any(label in ignore_pr_labels for label in labels):
labels_str = ", ".join(labels)
get_logger().info(f"Ignoring PR with labels '{labels_str}' due to github_app.ignore_pr_labels settings")
return {}
if ignore_pr_source_branches or ignore_pr_target_branches:
source_branch = pull_request.get("head", {}).get("ref", "")
target_branch = pull_request.get("base", {}).get("ref", "")
if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
get_logger().info(f"Ignoring PR with source branch '{source_branch}' due to github_app.ignore_pr_source_branches settings")
return {}
if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
get_logger().info(f"Ignoring PR with target branch '{target_branch}' due to github_app.ignore_pr_target_branches settings")
return {}
if get_identity_provider().verify_eligibility("github", sender_id, api_url) is not Eligibility.NOT_ELIGIBLE:
await _perform_auto_commands_github("pr_commands", agent, body, api_url, log_context)
else:
@ -269,6 +238,60 @@ def get_log_context(body, event, action, build_number):
return log_context, sender, sender_id, sender_type
def is_bot_user(sender, sender_type):
try:
# logic to ignore PRs opened by bot
if get_settings().get("GITHUB_APP.IGNORE_BOT_PR", False) and sender_type == "Bot":
if 'pr-agent' not in sender:
get_logger().info(f"Ignoring PR from '{sender=}' because it is a bot")
return True
except Exception as e:
get_logger().error(f"Failed 'is_bot_user' logic: {e}")
return False
def should_process_pr_logic(sender_type, sender, body) -> bool:
try:
pull_request = body.get("pull_request", {})
title = pull_request.get("title", "")
pr_labels = pull_request.get("labels", [])
source_branch = pull_request.get("head", {}).get("ref", "")
target_branch = pull_request.get("base", {}).get("ref", "")
# logic to ignore PRs with specific titles
if title:
ignore_pr_title_re = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
if not isinstance(ignore_pr_title_re, list):
ignore_pr_title_re = [ignore_pr_title_re]
if ignore_pr_title_re and any(re.search(regex, title) for regex in ignore_pr_title_re):
get_logger().info(f"Ignoring PR with title '{title}' due to config.ignore_pr_title setting")
return False
# logic to ignore PRs with specific labels or source branches or target branches.
ignore_pr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
if pr_labels and ignore_pr_labels:
labels = [label['name'] for label in pr_labels]
if any(label in ignore_pr_labels for label in labels):
labels_str = ", ".join(labels)
get_logger().info(f"Ignoring PR with labels '{labels_str}' due to config.ignore_pr_labels settings")
return False
ignore_pr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
ignore_pr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
if pull_request and (ignore_pr_source_branches or ignore_pr_target_branches):
if any(re.search(regex, source_branch) for regex in ignore_pr_source_branches):
get_logger().info(
f"Ignoring PR with source branch '{source_branch}' due to config.ignore_pr_source_branches settings")
return False
if any(re.search(regex, target_branch) for regex in ignore_pr_target_branches):
get_logger().info(
f"Ignoring PR with target branch '{target_branch}' due to config.ignore_pr_target_branches settings")
return False
except Exception as e:
get_logger().error(f"Failed 'should_process_pr_logic': {e}")
return True
async def handle_request(body: Dict[str, Any], event: str):
"""
Handle incoming GitHub webhook requests.
@ -277,19 +300,20 @@ async def handle_request(body: Dict[str, Any], event: str):
body: The request body.
event: The GitHub event type (e.g. "pull_request", "issue_comment", etc.).
"""
action = body.get("action") # "created", "opened", "reopened", "ready_for_review", "review_requested", "synchronize"
action = body.get("action") # "created", "opened", "reopened", "ready_for_review", "review_requested", "synchronize"
if not action:
return {}
agent = PRAgent()
log_context, sender, sender_id, sender_type = get_log_context(body, event, action, build_number)
# logic to ignore PRs opened by bot
if get_settings().get("GITHUB_APP.IGNORE_BOT_PR", False) and sender_type == "Bot":
if 'pr-agent' not in sender:
get_logger().info(f"Ignoring PR from '{sender=}' because it is a bot")
# logic to ignore PRs opened by bot, PRs with specific titles, labels, source branches, or target branches
if is_bot_user(sender, sender_type):
return {}
if action != 'created' and 'check_run' not in body:
if not should_process_pr_logic(sender_type, sender, body):
return {}
if 'check_run' in body: # handle failed checks
if 'check_run' in body: # handle failed checks
# get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_checks
pass
# handle comments on PRs
@ -304,7 +328,6 @@ async def handle_request(body: Dict[str, Any], event: str):
pass # handle_checkbox_clicked
# handle pull_request event with synchronize action - "push trigger" for new commits
elif event == 'pull_request' and action == 'synchronize':
# get_logger().debug(f'Request body', artifact=body, event=event) # added inside handle_push_trigger_for_new_commits
await handle_push_trigger_for_new_commits(body, event, sender,sender_id, action, log_context, agent)
elif event == 'pull_request' and action == 'closed':
if get_settings().get("CONFIG.ANALYTICS_FOLDER", ""):

View File

@ -75,6 +75,56 @@ async def _perform_commands_gitlab(commands_conf: str, agent: PRAgent, api_url:
except Exception as e:
get_logger().error(f"Failed to perform command {command}: {e}")
def is_bot_user(data) -> bool:
try:
# logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab)
sender_name = data.get("user", {}).get("name", "unknown").lower()
bot_indicators = ['codium', 'bot_', 'bot-', '_bot', '-bot']
if any(indicator in sender_name for indicator in bot_indicators):
get_logger().info(f"Skipping GitLab bot user: {sender_name}")
return True
except Exception as e:
get_logger().error(f"Failed 'is_bot_user' logic: {e}")
return False
def should_process_pr_logic(data, title) -> bool:
try:
# logic to ignore MRs for titles, labels and source, target branches.
ignore_mr_title = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
ignore_mr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
ignore_mr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
ignore_mr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
#
if ignore_mr_source_branches:
source_branch = data['object_attributes'].get('source_branch')
if any(re.search(regex, source_branch) for regex in ignore_mr_source_branches):
get_logger().info(
f"Ignoring MR with source branch '{source_branch}' due to gitlab.ignore_mr_source_branches settings")
return False
if ignore_mr_target_branches:
target_branch = data['object_attributes'].get('target_branch')
if any(re.search(regex, target_branch) for regex in ignore_mr_target_branches):
get_logger().info(
f"Ignoring MR with target branch '{target_branch}' due to gitlab.ignore_mr_target_branches settings")
return False
if ignore_mr_labels:
labels = [label['title'] for label in data['object_attributes'].get('labels', [])]
if any(label in ignore_mr_labels for label in labels):
labels_str = ", ".join(labels)
get_logger().info(f"Ignoring MR with labels '{labels_str}' due to gitlab.ignore_mr_labels settings")
return False
if ignore_mr_title:
if any(re.search(regex, title) for regex in ignore_mr_title):
get_logger().info(f"Ignoring MR with title '{title}' due to gitlab.ignore_mr_title settings")
return False
except Exception as e:
get_logger().error(f"Failed 'should_process_pr_logic': {e}")
return True
@router.post("/webhook")
@ -118,11 +168,13 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
sender = data.get("user", {}).get("username", "unknown")
sender_id = data.get("user", {}).get("id", "unknown")
# logic to ignore bot users (unlike Github, no direct flag for bot users in gitlab)
sender_name = data.get("user", {}).get("name", "unknown").lower()
if 'codium' in sender_name or 'bot_' in sender_name or 'bot-' in sender_name or '_bot' in sender_name or '-bot' in sender_name:
get_logger().info(f"Skipping bot user: {sender_name}")
# ignore bot users
if is_bot_user(data):
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
if data.get('event_type') != 'note': # not a comment
# ignore MRs based on title, labels, source and target branches
if not should_process_pr_logic(data, data['object_attributes'].get('title')):
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
log_context["sender"] = sender
if data.get('object_kind') == 'merge_request' and data['object_attributes'].get('action') in ['open', 'reopen']:
@ -130,40 +182,10 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
url = data['object_attributes'].get('url')
draft = data['object_attributes'].get('draft')
get_logger().info(f"New merge request: {url}")
if draft:
get_logger().info(f"Skipping draft MR: {url}")
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
# logic to ignore MRs for titles, labels and source, target branches.
ignore_mr_title = get_settings().get("CONFIG.IGNORE_PR_TITLE", [])
ignore_mr_labels = get_settings().get("CONFIG.IGNORE_PR_LABELS", [])
ignore_mr_source_branches = get_settings().get("CONFIG.IGNORE_PR_SOURCE_BRANCHES", [])
ignore_mr_target_branches = get_settings().get("CONFIG.IGNORE_PR_TARGET_BRANCHES", [])
if ignore_mr_source_branches:
source_branch = data['object_attributes'].get('source_branch')
if any(re.search(regex, source_branch) for regex in ignore_mr_source_branches):
get_logger().info(f"Ignoring MR with source branch '{source_branch}' due to gitlab.ignore_mr_source_branches settings")
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
if ignore_mr_target_branches:
target_branch = data['object_attributes'].get('target_branch')
if any(re.search(regex, target_branch) for regex in ignore_mr_target_branches):
get_logger().info(f"Ignoring MR with target branch '{target_branch}' due to gitlab.ignore_mr_target_branches settings")
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
if ignore_mr_labels:
labels = [label['title'] for label in data['object_attributes'].get('labels', [])]
if any(label in ignore_mr_labels for label in labels):
labels_str = ", ".join(labels)
get_logger().info(f"Ignoring MR with labels '{labels_str}' due to gitlab.ignore_mr_labels settings")
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
if ignore_mr_title:
if any(re.search(regex, title) for regex in ignore_mr_title):
get_logger().info(f"Ignoring MR with title '{title}' due to gitlab.ignore_mr_title settings")
return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))
await _perform_commands_gitlab("pr_commands", PRAgent(), url, log_context)
elif data.get('object_kind') == 'note' and data.get('event_type') == 'note': # comment on MR
if 'merge_request' in data:

View File

@ -35,15 +35,11 @@ is_auto_command=false
# seed
seed=-1 # set positive value to fix the seed (and ensure temperature=0)
temperature=0.2
# a list of regular expressions to match against the PR title to ignore the PR agent
ignore_pr_title = []
# a list of regular expressions of target branches to ignore from PR agent when an MR is created
ignore_pr_target_branches = []
# a list of regular expressions of source branches to ignore from PR agent when an MR is created
ignore_pr_source_branches = []
# labels to ignore from PR agent when an MR is created
ignore_pr_labels = []
# ignore logic
ignore_pr_title = ["^\\[Auto\\]", "^Auto"] # a list of regular expressions to match against the PR title to ignore the PR agent
ignore_pr_target_branches = [] # a list of regular expressions of target branches to ignore from PR agent when an PR is created
ignore_pr_source_branches = [] # a list of regular expressions of source branches to ignore from PR agent when an PR is created
ignore_pr_labels = [] # labels to ignore from PR agent when an PR is created
[pr_reviewer] # /review #
# enable/disable features