From 7db9a038055e37f10aadfaa4cd53a276a9b4b4a2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 10 Oct 2024 08:53:07 +0300 Subject: [PATCH] feat: integrate ticket extraction and enhance YAML handling in PR tools - Add ticket extraction and caching functionality in `pr_description.py` and `pr_reviewer.py`. - Introduce `keys_fix` parameter to improve YAML loading robustness. - Enhance error handling for estimated effort parsing in `pr_reviewer.py`. --- pr_agent/tools/pr_description.py | 21 ++++++++++++++------- pr_agent/tools/pr_reviewer.py | 21 ++++++++++++++++++--- 2 files changed, 32 insertions(+), 10 deletions(-) diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 9f7d79d3..c965d84e 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -20,6 +20,8 @@ from pr_agent.git_providers import get_git_provider, GithubProvider, get_git_pro from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.log import get_logger from pr_agent.servers.help import HelpMessage +from pr_agent.tools.ticket_pr_compliance_check import extract_ticket_links_from_pr_description, extract_tickets, \ + extract_and_cache_pr_tickets class PRDescription: @@ -38,6 +40,7 @@ class PRDescription: self.git_provider.get_languages(), self.git_provider.get_files() ) self.pr_id = self.git_provider.get_pr_id() + self.keys_fix = ["filename:", "language:", "changes_summary:", "changes_title:", "description:", "title:"] if get_settings().pr_description.enable_semantic_files_types and not self.git_provider.is_supported( "gfm_markdown"): @@ -60,6 +63,7 @@ class PRDescription: "enable_custom_labels": get_settings().config.enable_custom_labels, "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, + "related_tickets": "", } self.user_description = self.git_provider.get_user_description() @@ -87,6 +91,9 @@ class PRDescription: if get_settings().config.publish_output and not get_settings().config.get('is_auto_command', False): self.git_provider.publish_comment("Preparing PR description...", is_temporary=True) + # ticket extraction if exists + await extract_and_cache_pr_tickets(self.git_provider, self.vars) + await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO) if self.prediction: @@ -226,7 +233,7 @@ class PRDescription: file_description_str_list = [] for i, result in enumerate(results): prediction_files = result.strip().removeprefix('```yaml').strip('`').strip() - if load_yaml(prediction_files) and prediction_files.startswith('pr_files'): + if load_yaml(prediction_files, keys_fix_yaml=self.keys_fix) and prediction_files.startswith('pr_files'): prediction_files = prediction_files.removeprefix('pr_files:').strip() file_description_str_list.append(prediction_files) else: @@ -304,16 +311,16 @@ extra_file_yaml = # final processing self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough - if not load_yaml(self.prediction): + if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix): get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}") - if load_yaml(prediction_headers): + if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix): get_logger().debug(f"Using only headers for describe {self.pr_id}") self.prediction = prediction_headers async def extend_additional_files(self, remaining_files_list) -> str: prediction = self.prediction try: - original_prediction_dict = load_yaml(self.prediction) + original_prediction_dict = load_yaml(self.prediction, keys_fix_yaml=self.keys_fix) prediction_extra = "pr_files:" for file in remaining_files_list: extra_file_yaml = f"""\ @@ -327,12 +334,12 @@ extra_file_yaml = additional files (token-limit) """ prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() - prediction_extra_dict = load_yaml(prediction_extra) + prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix) # merge the two dictionaries if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict): original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"]) new_yaml = yaml.dump(original_prediction_dict) - if load_yaml(new_yaml): + if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix): prediction = new_yaml return prediction except Exception as e: @@ -361,7 +368,7 @@ extra_file_yaml = def _prepare_data(self): # Load the AI prediction data into a dictionary - self.data = load_yaml(self.prediction.strip()) + self.data = load_yaml(self.prediction.strip(), keys_fix_yaml=self.keys_fix) if get_settings().pr_description.add_original_user_description and self.user_description: self.data["User Description"] = self.user_description diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 88799d98..f5f82e06 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -1,5 +1,6 @@ import copy import datetime +import traceback from collections import OrderedDict from functools import partial from typing import List, Tuple @@ -15,6 +16,7 @@ from pr_agent.git_providers import get_git_provider, get_git_provider_with_conte from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language from pr_agent.log import get_logger from pr_agent.servers.help import HelpMessage +from pr_agent.tools.ticket_pr_compliance_check import extract_tickets, extract_and_cache_pr_tickets class PRReviewer: @@ -84,6 +86,7 @@ class PRReviewer: "custom_labels": "", "enable_custom_labels": get_settings().config.enable_custom_labels, "is_ai_metadata": get_settings().get("config.enable_ai_metadata", False), + "related_tickets": get_settings().get('related_tickets', []), } self.token_handler = TokenHandler( @@ -121,6 +124,9 @@ class PRReviewer: 'config': dict(get_settings().config)} get_logger().debug("Relevant configs", artifacts=relevant_configs) + # ticket extraction if exists + await extract_and_cache_pr_tickets(self.git_provider, self.vars) + if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set: get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files") previous_review_url = "" @@ -207,7 +213,7 @@ class PRReviewer: first_key = 'review' last_key = 'security_concerns' data = load_yaml(self.prediction.strip(), - keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", + keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", "relevant_file:", "relevant_line:", "suggestion:"], first_key=first_key, last_key=last_key) github_action_output(data, 'review') @@ -282,7 +288,7 @@ class PRReviewer: first_key = 'review' last_key = 'security_concerns' data = load_yaml(self.prediction.strip(), - keys_fix_yaml=["estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", + keys_fix_yaml=["ticket_compliance_check", "estimated_effort_to_review_[1-5]:", "security_concerns:", "key_issues_to_review:", "relevant_file:", "relevant_line:", "suggestion:"], first_key=first_key, last_key=last_key) comments: List[str] = [] @@ -401,7 +407,16 @@ class PRReviewer: review_labels = [] if get_settings().pr_reviewer.enable_review_labels_effort: estimated_effort = data['review']['estimated_effort_to_review_[1-5]'] - estimated_effort_number = int(estimated_effort.split(',')[0]) + estimated_effort_number = 0 + if isinstance(estimated_effort, str): + try: + estimated_effort_number = int(estimated_effort.split(',')[0]) + except ValueError: + get_logger().warning(f"Invalid estimated_effort value: {estimated_effort}") + elif isinstance(estimated_effort, int): + estimated_effort_number = estimated_effort + else: + get_logger().warning(f"Unexpected type for estimated_effort: {type(estimated_effort)}") if 1 <= estimated_effort_number <= 5: # 1, because ... review_labels.append(f'Review effort [1-5]: {estimated_effort_number}') if get_settings().pr_reviewer.enable_review_labels_security and get_settings().pr_reviewer.require_security_review: