Files
pr-agent/pr_agent/tools/pr_reviewer.py

368 lines
16 KiB
Python
Raw Normal View History

2023-07-06 00:21:08 +03:00
import copy
import datetime
2023-07-19 01:03:47 +03:00
from collections import OrderedDict
from typing import List, Tuple
2023-07-06 00:21:08 +03:00
2023-08-09 08:50:15 +03:00
import yaml
2023-07-06 00:21:08 +03:00
from jinja2 import Environment, StrictUndefined
2023-08-09 08:50:15 +03:00
from yaml import SafeLoader
2023-07-06 00:21:08 +03:00
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
2023-07-06 00:21:08 +03:00
from pr_agent.algo.token_handler import TokenHandler
2023-10-24 22:28:57 +03:00
from pr_agent.algo.utils import convert_to_markdown, load_yaml, try_fix_yaml, set_custom_labels
from pr_agent.config_loader import get_settings
2023-07-18 10:17:09 +03:00
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
from pr_agent.log import get_logger
2023-07-18 11:34:57 +03:00
from pr_agent.servers.help import actions_help_text, bot_help_text
2023-07-06 00:21:08 +03:00
class PRReviewer:
2023-07-26 17:24:03 +03:00
"""
The PRReviewer class is responsible for reviewing a pull request and generating feedback using an AI model.
"""
def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False, args: list = None):
2023-07-26 17:24:03 +03:00
"""
Initialize the PRReviewer object with the necessary attributes and objects to review a pull request.
Args:
pr_url (str): The URL of the pull request to be reviewed.
is_answer (bool, optional): Indicates whether the review is being done in answer mode. Defaults to False.
args (list, optional): List of arguments passed to the PRReviewer class. Defaults to None.
"""
2023-07-30 11:43:44 +03:00
self.parse_args(args) # -i command
2023-07-06 00:21:08 +03:00
2023-07-19 14:22:34 +03:00
self.git_provider = get_git_provider()(pr_url, incremental=self.incremental)
2023-07-07 16:31:28 +03:00
self.main_language = get_main_pr_language(
2023-07-07 16:10:33 +03:00
self.git_provider.get_languages(), self.git_provider.get_files()
)
2023-07-19 01:03:47 +03:00
self.pr_url = pr_url
2023-07-17 15:49:29 +03:00
self.is_answer = is_answer
self.is_auto = is_auto
2023-07-26 17:24:03 +03:00
2023-07-18 10:17:09 +03:00
if self.is_answer and not self.git_provider.is_supported("get_issue_comments"):
raise Exception(f"Answer mode is not supported for {get_settings().config.git_provider} for now")
2023-07-06 00:21:08 +03:00
self.ai_handler = AiHandler()
self.patches_diff = None
self.prediction = None
2023-07-26 17:24:03 +03:00
answer_str, question_str = self._get_user_answers()
2023-07-06 00:21:08 +03:00
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
2023-07-07 15:02:40 +03:00
"description": self.git_provider.get_pr_description(),
2023-07-06 17:34:40 +03:00
"language": self.main_language,
2023-07-06 00:21:08 +03:00
"diff": "", # empty diff for initial calculation
"require_score": get_settings().pr_reviewer.require_score_review,
"require_tests": get_settings().pr_reviewer.require_tests_review,
"require_security": get_settings().pr_reviewer.require_security_review,
"require_focused": get_settings().pr_reviewer.require_focused_review,
2023-09-17 16:31:58 +03:00
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
'num_code_suggestions': get_settings().pr_reviewer.num_code_suggestions,
2023-07-17 15:49:29 +03:00
'question_str': question_str,
'answer_str': answer_str,
"extra_instructions": get_settings().pr_reviewer.extra_instructions,
"commit_messages_str": self.git_provider.get_commit_messages(),
"custom_labels": "",
2023-10-29 08:58:12 +02:00
"enable_custom_labels": get_settings().config.enable_custom_labels,
2023-07-06 00:21:08 +03:00
}
2023-07-26 17:24:03 +03:00
self.token_handler = TokenHandler(
self.git_provider.pr,
self.vars,
get_settings().pr_review_prompt.system,
get_settings().pr_review_prompt.user
2023-07-26 17:24:03 +03:00
)
def parse_args(self, args: List[str]) -> None:
"""
Parse the arguments passed to the PRReviewer class and set the 'incremental' attribute accordingly.
Args:
args: A list of arguments passed to the PRReviewer class.
Returns:
None
"""
2023-07-19 14:22:34 +03:00
is_incremental = False
2023-07-19 18:18:18 +03:00
if args and len(args) >= 1:
2023-07-19 14:22:34 +03:00
arg = args[0]
if arg == "-i":
is_incremental = True
self.incremental = IncrementalPR(is_incremental)
async def run(self) -> None:
2023-07-26 17:24:03 +03:00
"""
Review the pull request and generate feedback.
"""
2023-09-20 07:39:56 +03:00
try:
if self.is_auto and not get_settings().pr_reviewer.automatic_review:
get_logger().info(f'Automatic review is disabled {self.pr_url}')
2023-09-20 07:39:56 +03:00
return None
if self.incremental.is_incremental and not self._can_run_incremental_review():
return None
2023-09-20 07:39:56 +03:00
get_logger().info(f'Reviewing PR: {self.pr_url} ...')
2023-09-20 07:39:56 +03:00
if get_settings().config.publish_output:
self.git_provider.publish_comment("Preparing review...", is_temporary=True)
await retry_with_fallback_models(self._prepare_prediction)
get_logger().info('Preparing PR review...')
2023-09-20 07:39:56 +03:00
pr_comment = self._prepare_pr_review()
if get_settings().config.publish_output:
get_logger().info('Pushing PR review...')
previous_review_comment = self._get_previous_review_comment()
2023-09-20 07:39:56 +03:00
self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment()
2023-10-29 17:59:46 +02:00
if previous_review_comment:
self._remove_previous_review_comment(previous_review_comment)
2023-09-20 07:39:56 +03:00
if get_settings().pr_reviewer.inline_code_comments:
get_logger().info('Pushing inline code comments...')
2023-09-20 07:39:56 +03:00
self._publish_inline_code_comments()
except Exception as e:
get_logger().error(f"Failed to review PR: {e}")
2023-07-06 00:21:08 +03:00
2023-07-26 17:24:03 +03:00
async def _prepare_prediction(self, model: str) -> None:
"""
Prepare the AI prediction for the pull request review.
Args:
model: A string representing the AI model to be used for the prediction.
Returns:
None
"""
get_logger().info('Getting PR diff...')
2023-07-23 16:16:36 +03:00
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
get_logger().info('Getting AI prediction...')
2023-07-23 16:16:36 +03:00
self.prediction = await self._get_prediction(model)
2023-07-26 17:24:03 +03:00
async def _get_prediction(self, model: str) -> str:
"""
Generate an AI prediction for the pull request review.
Args:
model: A string representing the AI model to be used for the prediction.
Returns:
A string representing the AI prediction for the pull request review.
"""
2023-07-06 00:21:08 +03:00
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
2023-07-26 17:24:03 +03:00
2023-07-06 00:21:08 +03:00
environment = Environment(undefined=StrictUndefined)
set_custom_labels(variables)
system_prompt = environment.from_string(get_settings().pr_review_prompt.system).render(variables)
user_prompt = environment.from_string(get_settings().pr_review_prompt.user).render(variables)
2023-07-26 17:24:03 +03:00
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"\nSystem prompt:\n{system_prompt}")
get_logger().info(f"\nUser prompt:\n{user_prompt}")
2023-07-26 17:24:03 +03:00
response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
system=system_prompt,
user=user_prompt
)
2023-07-11 17:32:48 +03:00
2023-07-06 00:21:08 +03:00
return response
def _prepare_pr_review(self) -> str:
2023-07-26 17:24:03 +03:00
"""
Prepare the PR review by processing the AI prediction and generating a markdown-formatted text that summarizes
the feedback.
2023-07-26 17:24:03 +03:00
"""
2023-08-09 08:50:15 +03:00
data = load_yaml(self.prediction.strip())
2023-07-06 12:49:10 +03:00
2023-07-26 17:24:03 +03:00
# Move 'Security concerns' key to 'PR Analysis' section for better display
pr_feedback = data.get('PR Feedback', {})
security_concerns = pr_feedback.get('Security concerns')
2023-08-09 08:50:15 +03:00
if security_concerns is not None:
del pr_feedback['Security concerns']
2023-08-09 08:50:15 +03:00
if type(security_concerns) == bool and security_concerns == False:
data.setdefault('PR Analysis', {})['Security concerns'] = 'No security concerns found'
else:
data.setdefault('PR Analysis', {})['Security concerns'] = security_concerns
#
if 'Code feedback' in pr_feedback:
code_feedback = pr_feedback['Code feedback']
# Filter out code suggestions that can be submitted as inline comments
if get_settings().pr_reviewer.inline_code_comments:
del pr_feedback['Code feedback']
else:
for suggestion in code_feedback:
2023-08-09 08:50:15 +03:00
if ('relevant file' in suggestion) and (not suggestion['relevant file'].startswith('``')):
suggestion['relevant file'] = f"``{suggestion['relevant file']}``"
if 'relevant line' not in suggestion:
suggestion['relevant line'] = ''
relevant_line_str = suggestion['relevant line'].split('\n')[0]
# removing '+'
suggestion['relevant line'] = relevant_line_str.lstrip('+').strip()
# try to add line numbers link to code suggestions
if hasattr(self.git_provider, 'generate_link_to_relevant_line_number'):
link = self.git_provider.generate_link_to_relevant_line_number(suggestion)
if link:
suggestion['relevant line'] = f"[{suggestion['relevant line']}]({link})"
2023-10-05 17:48:36 +03:00
else:
2023-10-05 17:59:08 +03:00
pass
2023-10-05 17:48:36 +03:00
2023-07-13 09:44:33 +03:00
2023-07-26 17:24:03 +03:00
# Add incremental review section
2023-07-19 14:22:34 +03:00
if self.incremental.is_incremental:
last_commit_url = f"{self.git_provider.get_pr_url()}/commits/" \
f"{self.git_provider.incremental.first_new_commit_sha}"
last_commit_msg = self.incremental.commits_range[0].commit.message if self.incremental.commits_range else ""
incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
if last_commit_msg:
replacement = last_commit_msg.splitlines(keepends=False)[0].replace('_', r'\_')
incremental_review_markdown_text += f" \n_({replacement})_"
2023-07-19 01:03:47 +03:00
data = OrderedDict(data)
2023-07-19 14:22:34 +03:00
data.update({'Incremental PR Review': {
"⏮️ Review for commits since previous PR-Agent review": incremental_review_markdown_text}})
2023-07-19 01:03:47 +03:00
data.move_to_end('Incremental PR Review', last=False)
markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"))
user = self.git_provider.get_user_id()
2023-07-26 17:24:03 +03:00
# Add help text if not in CLI mode
2023-08-01 15:58:23 +03:00
if not get_settings().get("CONFIG.CLI_MODE", False):
markdown_text += "\n### How to use\n"
bot_user = "[bot]" if get_settings().github_app.override_deployment_type else get_settings().github_app.bot_user
if user and bot_user not in user:
2023-07-16 15:00:13 +03:00
markdown_text += bot_help_text(user)
else:
2023-07-16 15:00:13 +03:00
markdown_text += actions_help_text
2023-07-26 17:24:03 +03:00
# Log markdown response if verbosity level is high
if get_settings().config.verbosity_level >= 2:
get_logger().info(f"Markdown response:\n{markdown_text}")
2023-08-03 16:05:46 -07:00
if markdown_text == None or len(markdown_text) == 0:
2023-08-09 08:50:15 +03:00
markdown_text = ""
2023-07-13 09:44:33 +03:00
return markdown_text
2023-07-26 17:24:03 +03:00
def _publish_inline_code_comments(self) -> None:
"""
Publishes inline comments on a pull request with code suggestions generated by the AI model.
"""
if get_settings().pr_reviewer.num_code_suggestions == 0:
2023-07-17 08:18:42 +03:00
return
2023-08-09 08:50:15 +03:00
review_text = self.prediction.strip()
2023-08-11 18:35:34 +03:00
review_text = review_text.removeprefix('```yaml').rstrip('`')
2023-07-13 09:44:33 +03:00
try:
2023-08-09 08:50:15 +03:00
data = yaml.load(review_text, Loader=SafeLoader)
except Exception as e:
get_logger().error(f"Failed to parse AI prediction: {e}")
2023-08-09 08:50:15 +03:00
data = try_fix_yaml(review_text)
2023-07-13 09:44:33 +03:00
2023-07-26 17:24:03 +03:00
comments: List[str] = []
for suggestion in data.get('PR Feedback', {}).get('Code feedback', []):
2023-07-26 17:24:03 +03:00
relevant_file = suggestion.get('relevant file', '').strip()
relevant_line_in_file = suggestion.get('relevant line', '').strip()
content = suggestion.get('suggestion', '')
if not relevant_file or not relevant_line_in_file or not content:
get_logger().info("Skipping inline comment with missing file/line/content")
continue
if self.git_provider.is_supported("create_inline_comment"):
comment = self.git_provider.create_inline_comment(content, relevant_file, relevant_line_in_file)
if comment:
comments.append(comment)
else:
self.git_provider.publish_inline_comment(content, relevant_file, relevant_line_in_file)
2023-07-17 16:53:38 +03:00
if comments:
self.git_provider.publish_inline_comments(comments)
2023-07-17 15:49:29 +03:00
2023-07-26 17:24:03 +03:00
def _get_user_answers(self) -> Tuple[str, str]:
"""
Retrieves the question and answer strings from the discussion messages related to a pull request.
Returns:
A tuple containing the question and answer strings.
"""
question_str = ""
answer_str = ""
2023-07-17 15:49:29 +03:00
if self.is_answer:
2023-07-18 10:17:09 +03:00
discussion_messages = self.git_provider.get_issue_comments()
2023-07-26 17:24:03 +03:00
for message in discussion_messages.reversed:
2023-07-17 15:49:29 +03:00
if "Questions to better understand the PR:" in message.body:
question_str = message.body
elif '/answer' in message.body:
answer_str = message.body
2023-07-26 17:24:03 +03:00
2023-07-17 15:49:29 +03:00
if answer_str and question_str:
break
2023-07-26 17:24:03 +03:00
2023-07-17 15:49:29 +03:00
return question_str, answer_str
def _get_previous_review_comment(self):
"""
Get the previous review comment if it exists.
"""
try:
if get_settings().pr_reviewer.remove_previous_review_comment and hasattr(self.git_provider, "get_previous_review"):
return self.git_provider.get_previous_review(
full=not self.incremental.is_incremental,
incremental=self.incremental.is_incremental,
)
except Exception as e:
get_logger().exception(f"Failed to get previous review comment, error: {e}")
def _remove_previous_review_comment(self, comment):
"""
2023-10-26 17:07:16 +03:00
Remove the previous review comment if it exists.
"""
try:
if get_settings().pr_reviewer.remove_previous_review_comment and comment:
self.git_provider.remove_comment(comment)
except Exception as e:
get_logger().exception(f"Failed to remove previous review comment, error: {e}")
def _can_run_incremental_review(self) -> bool:
"""Checks if we can run incremental review according the various configurations and previous review"""
# checking if running is auto mode but there are no new commits
if self.is_auto and not self.incremental.first_new_commit_sha:
get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new commits")
return False
# checking if there are enough commits to start the review
num_new_commits = len(self.incremental.commits_range)
num_commits_threshold = get_settings().pr_reviewer.minimal_commits_for_incremental_review
not_enough_commits = num_new_commits < num_commits_threshold
# checking if the commits are not too recent to start the review
recent_commits_threshold = datetime.datetime.now() - datetime.timedelta(
minutes=get_settings().pr_reviewer.minimal_minutes_for_incremental_review
)
last_seen_commit_date = (
self.incremental.last_seen_commit.commit.author.date if self.incremental.last_seen_commit else None
)
all_commits_too_recent = (
last_seen_commit_date > recent_commits_threshold if self.incremental.last_seen_commit else False
)
# check all the thresholds or just one to start the review
condition = any if get_settings().pr_reviewer.require_all_thresholds_for_incremental_review else all
if condition((not_enough_commits, all_commits_too_recent)):
get_logger().info(
f"Incremental review is enabled for {self.pr_url} but didn't pass the threshold check to run:"
f"\n* Number of new commits = {num_new_commits} (threshold is {num_commits_threshold})"
f"\n* Last seen commit date = {last_seen_commit_date} (threshold is {recent_commits_threshold})"
)
return False
return True