diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index ec902799..3bc91099 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -88,6 +88,7 @@ publish_description_as_comment_persistent=true ## changes walkthrough section enable_semantic_files_types=true collapsible_file_list='adaptive' # true, false, 'adaptive' +collapsible_file_list_threshold=8 inline_file_summary=false # false, true, 'table' # markers use_description_markers=false @@ -96,7 +97,6 @@ include_generated_by_header=true enable_large_pr_handling=true max_ai_calls=4 async_ai_calls=true -mention_extra_files=true #custom_labels = ['Bug fix', 'Tests', 'Bug fix with tests', 'Enhancement', 'Documentation', 'Other'] [pr_questions] # /ask # diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index 364dd9af..37c9745c 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -9,7 +9,7 @@ Your task is to provide a full description for the PR content - files walkthroug - Keep in mind that the 'Previous title', 'Previous description' and 'Commit messages' sections may be partial, simplistic, non-informative or out of date. Hence, compare them to the PR diff code, and use them only as a reference. - The generated title and description should prioritize the most significant changes. - If needed, each YAML output should be in block scalar indicator ('|-') -- When quoting variables or names from the code, use backticks (`) instead of single quote ('). +- When quoting variables, names or file paths from the code, use backticks (`) instead of single quote ('). {%- if extra_instructions %} @@ -38,23 +38,22 @@ class PRType(str, Enum): {%- if enable_semantic_files_types %} class FileDescription(BaseModel): - filename: str = Field(description="The full file path of the relevant file.") - language: str = Field(description="The programming language of the relevant file.") + filename: str = Field(description="The full file path of the relevant file") + language: str = Field(description="The programming language of the relevant file") +{%- if include_file_summary_changes %} changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).") - changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).") +{%- endif %} + changes_title: str = Field(description="one-line summary capturing the main theme of changes in the file") label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...") {%- endif %} class PRDescription(BaseModel): type: List[PRType] = Field(description="one or more types that describe the PR content. Return the label member value (e.g. 'Bug fix', not 'bug_fix')") {%- if enable_semantic_files_types %} - pr_files: List[FileDescription] = Field(max_items=15, description="a list of the files in the PR, and summary of their changes") + pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.") {%- endif %} description: str = Field(description="an informative and concise description of the PR. Use bullet points. Display first the most significant changes.") title: str = Field(description="an informative title for the PR, describing its main theme") -{%- if enable_custom_labels %} - labels: List[Label] = Field(min_items=0, description="choose the relevant custom labels that describe the PR content, and return their keys. Use the value field of the Label object to better understand the label meaning.") -{%- endif %} ===== @@ -70,25 +69,20 @@ pr_files: ... language: | ... +{%- if include_file_summary_changes %} changes_summary: | ... +{%- endif %} changes_title: | ... label: | - ... + label_key_1 ... {%- endif %} description: | ... title: | ... -{%- if enable_custom_labels %} -labels: -- | - ... -- | - ... -{%- endif %} ``` Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|') diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 357e9470..b11c3ead 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -1,6 +1,7 @@ import asyncio import copy import re +import traceback from functools import partial from typing import List, Tuple @@ -57,6 +58,7 @@ class PRDescription: self.ai_handler.main_pr_language = self.main_pr_language # Initialize the variables dictionary + self.COLLAPSIBLE_FILE_LIST_THRESHOLD = get_settings().pr_description.get("collapsible_file_list_threshold", 8) self.vars = { "title": self.git_provider.pr.title, "branch": self.git_provider.get_pr_branch(), @@ -69,6 +71,7 @@ class PRDescription: "custom_labels_class": "", # will be filled if necessary in 'set_custom_labels' function "enable_semantic_files_types": get_settings().pr_description.enable_semantic_files_types, "related_tickets": "", + "include_file_summary_changes": len(self.git_provider.get_diff_files()) <= self.COLLAPSIBLE_FILE_LIST_THRESHOLD } self.user_description = self.git_provider.get_user_description() @@ -85,7 +88,6 @@ class PRDescription: self.patches_diff = None self.prediction = None self.file_label_dict = None - self.COLLAPSIBLE_FILE_LIST_THRESHOLD = 8 async def run(self): try: @@ -114,6 +116,8 @@ class PRDescription: pr_labels, pr_file_changes = [], [] if get_settings().pr_description.publish_labels: pr_labels = self._prepare_labels() + else: + get_logger().debug(f"Publishing labels disabled") if get_settings().pr_description.use_description_markers: pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer_with_markers() @@ -137,6 +141,7 @@ class PRDescription: pr_body += show_relevant_configurations(relevant_section='pr_description') if get_settings().config.publish_output: + # publish labels if get_settings().pr_description.publish_labels and pr_labels and self.git_provider.is_supported("get_labels"): original_labels = self.git_provider.get_pr_labels(update=True) @@ -164,7 +169,7 @@ class PRDescription: self.git_provider.publish_description(pr_title, pr_body) # publish final update message - if (get_settings().pr_description.final_update_message): + if (get_settings().pr_description.final_update_message and not get_settings().config.get('is_auto_command', False)): latest_commit_url = self.git_provider.get_latest_commit_url() if latest_commit_url: pr_url = self.git_provider.get_pr_url() @@ -176,35 +181,36 @@ class PRDescription: get_settings().data = {"artifact": pr_body} return except Exception as e: - get_logger().error(f"Error generating PR description {self.pr_id}: {e}") + get_logger().error(f"Error generating PR description {self.pr_id}: {e}", + artifact={"traceback": traceback.format_exc()}) return "" async def _prepare_prediction(self, model: str) -> None: if get_settings().pr_description.use_description_markers and 'pr_agent:' not in self.user_description: - get_logger().info( - "Markers were enabled, but user description does not contain markers. skipping AI prediction") + get_logger().info("Markers were enabled, but user description does not contain markers. skipping AI prediction") return None large_pr_handling = get_settings().pr_description.enable_large_pr_handling and "pr_description_only_files_prompts" in get_settings() - output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, - return_remaining_files=True) + output = get_pr_diff(self.git_provider, self.token_handler, model, large_pr_handling=large_pr_handling, return_remaining_files=True) if isinstance(output, tuple): patches_diff, remaining_files_list = output else: patches_diff = output remaining_files_list = [] + if not large_pr_handling or patches_diff: self.patches_diff = patches_diff if patches_diff: + # generate the prediction get_logger().debug(f"PR diff", artifact=self.patches_diff) self.prediction = await self._get_prediction(model, patches_diff, prompt="pr_description_prompt") - if (remaining_files_list and 'pr_files' in self.prediction and 'label:' in self.prediction and - get_settings().pr_description.mention_extra_files): - get_logger().debug(f"Extending additional files, {len(remaining_files_list)} files") - self.prediction = await self.extend_additional_files(remaining_files_list) + + # extend the prediction with additional files not shown + self.prediction = await self.extend_uncovered_files(self.prediction) else: - get_logger().error(f"Error getting PR diff {self.pr_id}") + get_logger().error(f"Error getting PR diff {self.pr_id}", + artifact={"traceback": traceback.format_exc()}) self.prediction = None else: # get the diff in multiple patches, with the token handler only for the files prompt @@ -289,43 +295,81 @@ class PRDescription: prompt="pr_description_only_description_prompts") prediction_headers = prediction_headers.strip().removeprefix('```yaml').strip('`').strip() - # manually add extra files to final prediction - MAX_EXTRA_FILES_TO_OUTPUT = 100 - if get_settings().pr_description.mention_extra_files: - for i, file in enumerate(remaining_files_list): - extra_file_yaml = f"""\ -- filename: | - {file} - changes_summary: | - ... - changes_title: | - ... - label: | - additional files (token-limit) -""" - files_walkthrough = files_walkthrough.strip() + "\n" + extra_file_yaml.strip() - if i >= MAX_EXTRA_FILES_TO_OUTPUT: - files_walkthrough += f"""\ -extra_file_yaml = -- filename: | - Additional {len(remaining_files_list) - MAX_EXTRA_FILES_TO_OUTPUT} files not shown - changes_summary: | - ... - changes_title: | - ... - label: | - additional files (token-limit) -""" - break + # extend the tables with the files not shown + files_walkthrough_extended = await self.extend_uncovered_files(files_walkthrough) # final processing - self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough + self.prediction = prediction_headers + "\n" + "pr_files:\n" + files_walkthrough_extended if not load_yaml(self.prediction, keys_fix_yaml=self.keys_fix): get_logger().error(f"Error getting valid YAML in large PR handling for describe {self.pr_id}") if load_yaml(prediction_headers, keys_fix_yaml=self.keys_fix): get_logger().debug(f"Using only headers for describe {self.pr_id}") self.prediction = prediction_headers + async def extend_uncovered_files(self, original_prediction: str) -> str: + try: + prediction = original_prediction + + # get the original prediction filenames + original_prediction_loaded = load_yaml(original_prediction, keys_fix_yaml=self.keys_fix) + if isinstance(original_prediction_loaded, list): + original_prediction_dict = {"pr_files": original_prediction_loaded} + else: + original_prediction_dict = original_prediction_loaded + filenames_predicted = [file['filename'].strip() for file in original_prediction_dict.get('pr_files', [])] + + # extend the prediction with additional files not included in the original prediction + pr_files = self.git_provider.get_diff_files() + prediction_extra = "pr_files:" + MAX_EXTRA_FILES_TO_OUTPUT = 100 + counter_extra_files = 0 + for file in pr_files: + if file.filename in filenames_predicted: + continue + + # add up to MAX_EXTRA_FILES_TO_OUTPUT files + counter_extra_files += 1 + if counter_extra_files > MAX_EXTRA_FILES_TO_OUTPUT: + extra_file_yaml = f"""\ +- filename: | + Additional files not shown + changes_title: | + ... + label: | + additional files +""" + prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() + get_logger().debug(f"Too many remaining files, clipping to {MAX_EXTRA_FILES_TO_OUTPUT}") + break + + extra_file_yaml = f"""\ +- filename: | + {file.filename} + changes_title: | + ... + label: | + additional files +""" + prediction_extra = prediction_extra + "\n" + extra_file_yaml.strip() + + # merge the two dictionaries + if counter_extra_files > 0: + get_logger().info(f"Adding {counter_extra_files} unprocessed extra files to table prediction") + prediction_extra_dict = load_yaml(prediction_extra, keys_fix_yaml=self.keys_fix) + if isinstance(original_prediction_dict, dict) and isinstance(prediction_extra_dict, dict): + original_prediction_dict["pr_files"].extend(prediction_extra_dict["pr_files"]) + new_yaml = yaml.dump(original_prediction_dict) + if load_yaml(new_yaml, keys_fix_yaml=self.keys_fix): + prediction = new_yaml + if isinstance(original_prediction, list): + prediction = yaml.dump(original_prediction_dict["pr_files"]) + + return prediction + except Exception as e: + get_logger().error(f"Error extending uncovered files {self.pr_id}: {e}") + return original_prediction + + async def extend_additional_files(self, remaining_files_list) -> str: prediction = self.prediction try: @@ -397,31 +441,31 @@ extra_file_yaml = self.data['pr_files'] = self.data.pop('pr_files') def _prepare_labels(self) -> List[str]: - pr_types = [] + pr_labels = [] # If the 'PR Type' key is present in the dictionary, split its value by comma and assign it to 'pr_types' - if 'labels' in self.data: + if 'labels' in self.data and self.data['labels']: if type(self.data['labels']) == list: - pr_types = self.data['labels'] + pr_labels = self.data['labels'] elif type(self.data['labels']) == str: - pr_types = self.data['labels'].split(',') - elif 'type' in self.data: + pr_labels = self.data['labels'].split(',') + elif 'type' in self.data and self.data['type'] and get_settings().pr_description.publish_labels: if type(self.data['type']) == list: - pr_types = self.data['type'] + pr_labels = self.data['type'] elif type(self.data['type']) == str: - pr_types = self.data['type'].split(',') - pr_types = [label.strip() for label in pr_types] + pr_labels = self.data['type'].split(',') + pr_labels = [label.strip() for label in pr_labels] # convert lowercase labels to original case try: if "labels_minimal_to_labels_dict" in self.variables: d: dict = self.variables["labels_minimal_to_labels_dict"] - for i, label_i in enumerate(pr_types): + for i, label_i in enumerate(pr_labels): if label_i in d: - pr_types[i] = d[label_i] + pr_labels[i] = d[label_i] except Exception as e: get_logger().error(f"Error converting labels to original case {self.pr_id}: {e}") - return pr_types + return pr_labels def _prepare_pr_answer_with_markers(self) -> Tuple[str, str, str, List[dict]]: get_logger().info(f"Using description marker replacements {self.pr_id}") @@ -528,14 +572,18 @@ extra_file_yaml = return file_label_dict for file in self.data['pr_files']: try: - required_fields = ['changes_summary', 'changes_title', 'filename', 'label'] + required_fields = ['changes_title', 'filename', 'label'] if not all(field in file for field in required_fields): # can happen for example if a YAML generation was interrupted in the middle (no more tokens) get_logger().warning(f"Missing required fields in file label dict {self.pr_id}, skipping file", artifact={"file": file}) continue + if not file.get('changes_title'): + get_logger().warning(f"Empty changes title or summary in file label dict {self.pr_id}, skipping file", + artifact={"file": file}) + continue filename = file['filename'].replace("'", "`").replace('"', '`') - changes_summary = file['changes_summary'] + changes_summary = file.get('changes_summary', "").strip() changes_title = file['changes_title'].strip() label = file.get('label').strip().lower() if label not in file_label_dict: @@ -578,12 +626,14 @@ extra_file_yaml = for filename, file_changes_title, file_change_description in list_tuples: filename = filename.replace("'", "`").rstrip() filename_publish = filename.split("/")[-1] - - file_changes_title_code = f"{file_changes_title}" - file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip() - if len(file_changes_title_code_br) < (delta - 5): - file_changes_title_code_br += "  " * ((delta - 5) - len(file_changes_title_code_br)) - filename_publish = f"{filename_publish}
{file_changes_title_code_br}
" + if file_changes_title and file_changes_title.strip() != "...": + file_changes_title_code = f"{file_changes_title}" + file_changes_title_code_br = insert_br_after_x_chars(file_changes_title_code, x=(delta - 5)).strip() + if len(file_changes_title_code_br) < (delta - 5): + file_changes_title_code_br += "  " * ((delta - 5) - len(file_changes_title_code_br)) + filename_publish = f"{filename_publish}
{file_changes_title_code_br}
" + else: + filename_publish = f"{filename_publish}" diff_plus_minus = "" delta_nbsp = "" diff_files = self.git_provider.get_diff_files() @@ -592,6 +642,8 @@ extra_file_yaml = num_plus_lines = f.num_plus_lines num_minus_lines = f.num_minus_lines diff_plus_minus += f"+{num_plus_lines}/-{num_minus_lines}" + if len(diff_plus_minus) > 12 or diff_plus_minus == "+0/-0": + diff_plus_minus = "[link]" delta_nbsp = "  " * max(0, (8 - len(diff_plus_minus))) break @@ -600,9 +652,40 @@ extra_file_yaml = if hasattr(self.git_provider, 'get_line_link'): filename = filename.strip() link = self.git_provider.get_line_link(filename, relevant_line_start=-1) + if (not link or not diff_plus_minus) and ('additional files' not in filename.lower()): + get_logger().warning(f"Error getting line link for '{filename}'") + continue + # Add file data to the PR body file_change_description_br = insert_br_after_x_chars(file_change_description, x=(delta - 5)) - pr_body += f""" + pr_body = self.add_file_data(delta_nbsp, diff_plus_minus, file_change_description_br, filename, + filename_publish, link, pr_body) + + # Close the collapsible file list + if use_collapsible_file_list: + pr_body += """""" + else: + pr_body += """""" + pr_body += """""" + + except Exception as e: + get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {str(e)}") + pass + return pr_body, pr_comments + + def add_file_data(self, delta_nbsp, diff_plus_minus, file_change_description_br, filename, filename_publish, link, + pr_body) -> str: + + if not file_change_description_br: + pr_body += f""" + + {filename_publish} + {diff_plus_minus}{delta_nbsp} + + +""" + else: + pr_body += f"""
@@ -622,17 +705,7 @@ extra_file_yaml = """ - if use_collapsible_file_list: - pr_body += """
""" - else: - pr_body += """""" - pr_body += """""" - - except Exception as e: - get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {e}") - pass - return pr_body, pr_comments - + return pr_body def count_chars_without_html(string): if '<' not in string: @@ -641,11 +714,14 @@ def count_chars_without_html(string): return len(no_html_string) -def insert_br_after_x_chars(text, x=70): +def insert_br_after_x_chars(text: str, x=70): """ Insert
into a string after a word that increases its length above x characters. Use proper HTML tags for code and new lines. """ + + if not text: + return "" if count_chars_without_html(text) < x: return text