feat: integrate ticket extraction and enhance YAML handling in PR tools

- Add ticket extraction and caching functionality in `pr_description.py` and `pr_reviewer.py`.
- Introduce `keys_fix` parameter to improve YAML loading robustness.
- Enhance error handling for estimated effort parsing in `pr_reviewer.py`.
This commit is contained in:
mrT23
2024-10-10 08:53:07 +03:00
parent 4eef3e9190
commit 7db9a03805
2 changed files with 32 additions and 10 deletions

View File

@ -20,6 +20,8 @@ from pr_agent.git_providers import get_git_provider, GithubProvider, get_git_pro
from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.git_providers.git_provider import get_main_pr_language
from pr_agent.log import get_logger from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage from pr_agent.servers.help import HelpMessage
from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_pr_description, extract_tickets, \
extract_and_cache_pr_tickets
class PRDescription: class PRDescription:
@ -38,6 +40,7 @@ class PRDescription:
self.git_provider.get_languages(), self.git_provider.get_files() self.git_provider.get_languages(), self.git_provider.get_files()
) )
self.pr_id = self.git_provider.get_pr_id() self.pr_id = self.git_provider.get_pr_id()
self.keys_fix = ["filename:", "language:", "changes_summary:", "changes_title:", "description:", "title:"]
if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported( if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported(
"gfm_markdown"): "gfm_markdown"):
@ -60,6 +63,7 @@ class PRDescription:
"enable_custom_labels": get_settings().config.enable_custom_labels, "enable_custom_labels": get_settings().config.enable_custom_labels,
"custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function
"enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types,
"related_tickets": "",
} }
self.user_description = self.git_provider.get_user_description() self.user_description = self.git_provider.get_user_description()
@ -87,6 +91,9 @@ class PRDescription:
if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False): if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False):
self.git_provider.publish_comment("Preparing PR description...", is_temporary=True) self.git_provider.publish_comment("Preparing PR description...", is_temporary=True)
# ticket extraction if exists
await extract_and_cache_pr_tickets(self.git_provider, self.vars)
await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO) await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO)
if self.prediction: if self.prediction:
@ -226,7 +233,7 @@ class PRDescription:
file_description_str_list = [] file_description_str_list = []
for i, result in enumerate(results): for i, result in enumerate(results):
prediction_files = result.strip().removeprefix('```yaml').strip('`').strip() prediction_files = result.strip().removeprefix('```yaml').strip('`').strip()
if load_yaml(prediction_files) and prediction_files.startswith('pr_files'): if load_yaml(prediction_files, keys_fix_yaml=self.keys_fix) and prediction_files.startswith('pr_files'):
prediction_files = prediction_files.removeprefix('pr_files:').strip() prediction_files = prediction_files.removeprefix('pr_files:').strip()
file_description_str_list.append(prediction_files) file_description_str_list.append(prediction_files)
else: else:
@ -304,16 +311,16 @@ extra_file_yaml =
# final processing # final processing
self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough
if not load_yaml(self.prediction): if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix):
get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}") get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}")
if load_yaml(prediction_headers): if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix):
get_logger().debug(f"Using only headers for describe {self.pr_id}") get_logger().debug(f"Using only headers for describe {self.pr_id}")
self.prediction = prediction_headers self.prediction = prediction_headers
async def extend_additional_files(self, remaining_files_list) -> str: async def extend_additional_files(self, remaining_files_list) -> str:
prediction = self.prediction prediction = self.prediction
try: try:
original_prediction_dict = load_yaml(self.prediction) original_prediction_dict = load_yaml(self.prediction, keys_fix_yaml=self.keys_fix)
prediction_extra = "pr_files:" prediction_extra = "pr_files:"
for file in remaining_files_list: for file in remaining_files_list:
extra_file_yaml = f"""\ extra_file_yaml = f"""\
@ -327,12 +334,12 @@ extra_file_yaml =
additional files (token-limit) additional files (token-limit)
""" """
prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip()
prediction_extra_dict = load_yaml(prediction_extra) prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix)
# merge the two dictionaries # merge the two dictionaries
if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict): if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict):
original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"]) original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"])
new_yaml = yaml.dump(original_prediction_dict) new_yaml = yaml.dump(original_prediction_dict)
if load_yaml(new_yaml): if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix):
prediction = new_yaml prediction = new_yaml
return prediction return prediction
except Exception as e: except Exception as e:
@ -361,7 +368,7 @@ extra_file_yaml =
def _prepare_data(self): def _prepare_data(self):
# Load the AI prediction data into a dictionary # Load the AI prediction data into a dictionary
self.data = load_yaml(self.prediction.strip()) self.data = load_yaml(self.prediction.strip(), keys_fix_yaml=self.keys_fix)
if get_settings().pr_description.add_original_user_description and self.user_description: if get_settings().pr_description.add_original_user_description and self.user_description:
self.data["User Description"] = self.user_description self.data["User Description"] = self.user_description

View File

@ -1,5 +1,6 @@
import copy import copy
import datetime import datetime
import traceback
from collections import OrderedDict from collections import OrderedDict
from functools import partial from functools import partial
from typing import List, Tuple from typing import List, Tuple
@ -15,6 +16,7 @@ from pr_agent.git_providers import get_git_provider, get_git_provider_with_conte
from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
from pr_agent.log import get_logger from pr_agent.log import get_logger
from pr_agent.servers.help import HelpMessage from pr_agent.servers.help import HelpMessage
from pr_agent.tools.ticket_pr_compliance_check import extract_tickets, extract_and_cache_pr_tickets
class PRReviewer: class PRReviewer:
@ -84,6 +86,7 @@ class PRReviewer:
"custom_labels": "", "custom_labels": "",
"enable_custom_labels": get_settings().config.enable_custom_labels, "enable_custom_labels": get_settings().config.enable_custom_labels,
"is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False),
"related_tickets": get_settings().get('related_tickets', []),
} }
self.token_handler = TokenHandler( self.token_handler = TokenHandler(
@ -121,6 +124,9 @@ class PRReviewer:
'config': dict(get_settings().config)} 'config': dict(get_settings().config)}
get_logger().debug("Relevant configs", artifacts=relevant_configs) get_logger().debug("Relevant configs", artifacts=relevant_configs)
# ticket extraction if exists
await extract_and_cache_pr_tickets(self.git_provider, self.vars)
if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set: if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set:
get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files") get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files")
previous_review_url = "" previous_review_url = ""
@ -207,7 +213,7 @@ class PRReviewer:
first_key = 'review' first_key = 'review'
last_key = 'security_concerns' last_key = 'security_concerns'
data = load_yaml(self.prediction.strip(), data = load_yaml(self.prediction.strip(),
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"], "relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key) first_key=first_key, last_key=last_key)
github_action_output(data, 'review') github_action_output(data, 'review')
@ -282,7 +288,7 @@ class PRReviewer:
first_key = 'review' first_key = 'review'
last_key = 'security_concerns' last_key = 'security_concerns'
data = load_yaml(self.prediction.strip(), data = load_yaml(self.prediction.strip(),
keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:",
"relevant_file:", "relevant_line:", "suggestion:"], "relevant_file:", "relevant_line:", "suggestion:"],
first_key=first_key, last_key=last_key) first_key=first_key, last_key=last_key)
comments: List[str] = [] comments: List[str] = []
@ -401,7 +407,16 @@ class PRReviewer:
review_labels = [] review_labels = []
if get_settings().pr_reviewer.enable_review_labels_effort: if get_settings().pr_reviewer.enable_review_labels_effort:
estimated_effort = data['review']['estimated_effort_to_review_[1-5]'] estimated_effort = data['review']['estimated_effort_to_review_[1-5]']
estimated_effort_number = int(estimated_effort.split(',')[0]) estimated_effort_number = 0
if isinstance(estimated_effort, str):
try:
estimated_effort_number = int(estimated_effort.split(',')[0])
except ValueError:
get_logger().warning(f"Invalid estimated_effort value: {estimated_effort}")
elif isinstance(estimated_effort, int):
estimated_effort_number = estimated_effort
else:
get_logger().warning(f"Unexpected type for estimated_effort: {type(estimated_effort)}")
if 1 <= estimated_effort_number <= 5: # 1, because ... if 1 <= estimated_effort_number <= 5: # 1, because ...
review_labels.append(f'Review effort [1-5]: {estimated_effort_number}') review_labels.append(f'Review effort [1-5]: {estimated_effort_number}')
if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review: if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review: