Files
pr-agent/pr_agent/tools/pr_description.py

257 lines
10 KiB
Python
Raw Normal View History

2023-07-13 17:24:56 +03:00
import copy
import json
2023-09-04 12:11:39 -04:00
import re
2023-07-13 17:24:56 +03:00
import logging
from typing import List, Tuple
2023-07-13 17:24:56 +03:00
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
2023-07-23 16:16:36 +03:00
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
2023-07-13 17:24:56 +03:00
from pr_agent.algo.token_handler import TokenHandler
2023-08-09 08:50:15 +03:00
from pr_agent.algo.utils import load_yaml
from pr_agent.config_loader import get_settings
2023-07-13 17:24:56 +03:00
from pr_agent.git_providers import get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
class PRDescription:
2023-07-27 17:42:50 +03:00
def __init__(self, pr_url: str, args: list = None):
2023-07-24 12:14:53 +03:00
"""
Initialize the PRDescription object with the necessary attributes and objects for generating a PR description
using an AI model.
2023-07-24 12:14:53 +03:00
Args:
pr_url (str): The URL of the pull request.
2023-07-27 17:42:50 +03:00
args (list, optional): List of arguments passed to the PRDescription class. Defaults to None.
2023-07-24 12:14:53 +03:00
"""
2023-07-24 12:41:00 +03:00
# Initialize the git provider and main PR language
2023-07-13 17:24:56 +03:00
self.git_provider = get_git_provider()(pr_url)
self.main_pr_language = get_main_pr_language(
self.git_provider.get_languages(), self.git_provider.get_files()
)
2023-09-04 12:11:39 -04:00
self.pr_id = f"{self.git_provider.repo}/{self.git_provider.pr_num}"
2023-08-01 15:15:59 +03:00
2023-07-24 12:41:00 +03:00
# Initialize the AI handler
2023-07-13 17:24:56 +03:00
self.ai_handler = AiHandler()
2023-07-24 12:41:00 +03:00
# Initialize the variables dictionary
2023-07-13 17:24:56 +03:00
self.vars = {
"title": self.git_provider.pr.title,
"branch": self.git_provider.get_pr_branch(),
2023-08-30 23:05:41 +03:00
"description": self.git_provider.get_pr_description(full=False),
2023-07-13 17:24:56 +03:00
"language": self.main_pr_language,
"diff": "", # empty diff for initial calculation
"extra_instructions": get_settings().pr_description.extra_instructions,
"commit_messages_str": self.git_provider.get_commit_messages()
2023-07-13 17:24:56 +03:00
}
self.user_description = self.git_provider.get_user_description()
2023-07-24 12:41:00 +03:00
# Initialize the token handler
2023-07-24 12:14:53 +03:00
self.token_handler = TokenHandler(
self.git_provider.pr,
self.vars,
get_settings().pr_description_prompt.system,
get_settings().pr_description_prompt.user,
2023-07-24 12:14:53 +03:00
)
2023-07-24 12:41:00 +03:00
# Initialize patches_diff and prediction attributes
2023-07-13 17:24:56 +03:00
self.patches_diff = None
self.prediction = None
async def run(self):
2023-07-24 12:14:53 +03:00
"""
Generates a PR description using an AI model and publishes it to the PR.
"""
2023-09-04 12:11:39 -04:00
2023-09-20 07:39:56 +03:00
try:
logging.info(f"Generating a PR description {self.pr_id}")
if get_settings().config.publish_output:
self.git_provider.publish_comment("Preparing pr description...", is_temporary=True)
2023-09-04 12:11:39 -04:00
2023-09-20 07:39:56 +03:00
await retry_with_fallback_models(self._prepare_prediction)
2023-09-04 12:11:39 -04:00
2023-09-20 07:39:56 +03:00
logging.info(f"Preparing answer {self.pr_id}")
if self.prediction:
self._prepare_data()
else:
return None
2023-09-04 12:11:39 -04:00
2023-09-20 07:39:56 +03:00
pr_labels = []
if get_settings().pr_description.publish_labels:
pr_labels = self._prepare_labels()
2023-09-20 07:39:56 +03:00
if get_settings().pr_description.use_description_markers:
pr_title, pr_body = self._prepare_pr_answer_with_markers()
2023-07-17 08:18:42 +03:00
else:
2023-09-20 07:39:56 +03:00
pr_title, pr_body, = self._prepare_pr_answer()
full_markdown_description = f"## Title\n\n{pr_title}\n\n___\n{pr_body}"
if get_settings().config.publish_output:
logging.info(f"Pushing answer {self.pr_id}")
if get_settings().pr_description.publish_description_as_comment:
self.git_provider.publish_comment(full_markdown_description)
else:
self.git_provider.publish_description(pr_title, pr_body)
if get_settings().pr_description.publish_labels and self.git_provider.is_supported("get_labels"):
current_labels = self.git_provider.get_labels()
if current_labels is None:
current_labels = []
self.git_provider.publish_labels(pr_labels + current_labels)
self.git_provider.remove_initial_comment()
except Exception as e:
logging.error(f"Error generating PR description {self.pr_id}: {e}")
2023-07-24 12:14:53 +03:00
2023-07-13 17:24:56 +03:00
return ""
2023-07-24 12:14:53 +03:00
async def _prepare_prediction(self, model: str) -> None:
"""
Prepare the AI prediction for the PR description based on the provided model.
Args:
model (str): The name of the model to be used for generating the prediction.
Returns:
None
Raises:
Any exceptions raised by the 'get_pr_diff' and '_get_prediction' functions.
"""
2023-09-04 12:11:39 -04:00
if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description:
return None
logging.info(f"Getting PR diff {self.pr_id}")
2023-07-23 16:16:36 +03:00
self.patches_diff = get_pr_diff(self.git_provider, self.token_handler, model)
2023-09-04 12:11:39 -04:00
logging.info(f"Getting AI prediction {self.pr_id}")
2023-07-23 16:16:36 +03:00
self.prediction = await self._get_prediction(model)
2023-07-24 11:31:35 +03:00
async def _get_prediction(self, model: str) -> str:
"""
Generate an AI prediction for the PR description based on the provided model.
Args:
model (str): The name of the model to be used for generating the prediction.
Returns:
str: The generated AI prediction.
"""
2023-07-13 17:24:56 +03:00
variables = copy.deepcopy(self.vars)
variables["diff"] = self.patches_diff # update diff
2023-07-24 11:31:35 +03:00
2023-07-13 17:24:56 +03:00
environment = Environment(undefined=StrictUndefined)
system_prompt = environment.from_string(get_settings().pr_description_prompt.system).render(variables)
user_prompt = environment.from_string(get_settings().pr_description_prompt.user).render(variables)
2023-07-24 11:31:35 +03:00
if get_settings().config.verbosity_level >= 2:
2023-07-13 17:24:56 +03:00
logging.info(f"\nSystem prompt:\n{system_prompt}")
logging.info(f"\nUser prompt:\n{user_prompt}")
2023-07-24 11:31:35 +03:00
response, finish_reason = await self.ai_handler.chat_completion(
model=model,
temperature=0.2,
system=system_prompt,
user=user_prompt
)
2023-07-13 17:24:56 +03:00
return response
2023-07-24 09:15:45 +03:00
2023-09-04 12:11:39 -04:00
def _prepare_data(self):
2023-07-24 09:15:45 +03:00
# Load the AI prediction data into a dictionary
2023-09-04 12:11:39 -04:00
self.data = load_yaml(self.prediction.strip())
2023-07-24 09:15:45 +03:00
if get_settings().pr_description.add_original_user_description and self.user_description:
2023-09-04 12:11:39 -04:00
self.data["User Description"] = self.user_description
2023-09-14 08:13:00 +03:00
def _prepare_labels(self) -> List[str]:
2023-07-24 09:15:45 +03:00
pr_types = []
# If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types'
2023-09-04 12:11:39 -04:00
if 'PR Type' in self.data:
if type(self.data['PR Type']) == list:
pr_types = self.data['PR Type']
elif type(self.data['PR Type']) == str:
pr_types = self.data['PR Type'].split(',')
return pr_types
2023-09-14 08:13:00 +03:00
def _prepare_pr_answer_with_markers(self) -> Tuple[str, str]:
logging.info(f"Using description marker replacements {self.pr_id}")
2023-09-04 12:11:39 -04:00
title = self.vars["title"]
body = self.user_description
if get_settings().pr_description.include_generated_by_header:
ai_header = f"### 🤖 Generated by PR Agent at {self.git_provider.last_commit_id.sha}\n\n"
else:
ai_header = ""
ai_summary = self.data.get('PR Description')
if ai_summary and not re.search(r'<!--\s*pr_agent:summary\s*-->', body):
summary = f"{ai_header}{ai_summary}"
body = body.replace('pr_agent:summary', summary)
if not re.search(r'<!--\s*pr_agent:walkthrough\s*-->', body):
ai_walkthrough = self.data.get('PR Main Files Walkthrough')
if ai_walkthrough:
walkthrough = str(ai_header)
for file in ai_walkthrough:
filename = file['filename'].replace("'", "`")
description = file['changes in file'].replace("'", "`")
walkthrough += f'- `{filename}`: {description}\n'
body = body.replace('pr_agent:walkthrough', walkthrough)
return title, body
def _prepare_pr_answer(self) -> Tuple[str, str]:
2023-09-04 12:11:39 -04:00
"""
Prepare the PR description based on the AI prediction data.
Returns:
- title: a string containing the PR title.
- pr_body: a string containing the PR description body in a markdown format.
"""
# Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format
markdown_text = ""
for key, value in self.data.items():
markdown_text += f"## {key}\n\n"
markdown_text += f"{value}\n\n"
2023-07-24 09:15:45 +03:00
# Remove the 'PR Title' key from the dictionary
2023-09-04 12:11:39 -04:00
ai_title = self.data.pop('PR Title', self.vars["title"])
if get_settings().pr_description.keep_original_user_title:
# Assign the original PR title to the 'title' variable
title = self.vars["title"]
else:
# Assign the value of the 'PR Title' key to 'title' variable
title = ai_title
2023-07-24 09:15:45 +03:00
# Iterate over the remaining dictionary items and append the key and value to 'pr_body' in a markdown format,
# except for the items containing the word 'walkthrough'
2023-08-09 08:50:15 +03:00
pr_body = ""
2023-09-04 12:11:39 -04:00
for idx, (key, value) in enumerate(self.data.items()):
2023-07-27 17:31:31 +03:00
pr_body += f"## {key}:\n"
2023-07-13 17:31:28 +03:00
if 'walkthrough' in key.lower():
2023-08-09 08:50:15 +03:00
# for filename, description in value.items():
2023-09-17 16:51:16 +03:00
if self.git_provider.is_supported("gfm_markdown"):
2023-09-17 16:56:23 +03:00
pr_body += "<details> <summary>files:</summary>\n\n"
2023-08-09 08:50:15 +03:00
for file in value:
filename = file['filename'].replace("'", "`")
description = file['changes in file']
pr_body += f'`{filename}`: {description}\n'
2023-09-17 16:51:16 +03:00
if self.git_provider.is_supported("gfm_markdown"):
pr_body +="</details>\n"
2023-07-13 17:24:56 +03:00
else:
2023-08-09 08:50:15 +03:00
# if the value is a list, join its items by comma
if type(value) == list:
value = ', '.join(v for v in value)
pr_body += f"{value}\n"
2023-09-04 12:11:39 -04:00
if idx < len(self.data) - 1:
pr_body += "\n___\n"
2023-07-24 09:15:45 +03:00
if get_settings().config.verbosity_level >= 2:
2023-07-13 17:53:17 +03:00
logging.info(f"title:\n{title}\n{pr_body}")
2023-07-24 09:15:45 +03:00
2023-09-14 08:13:00 +03:00
return title, pr_body