From 754d47f1875a4839abbc6a98858886ca33b937e4 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 18 Jul 2025 08:51:48 +0300 Subject: [PATCH 1/5] refactor(pr_description): redesign changes walkthrough section and improve file processing --- pr_agent/algo/utils.py | 5 +- .../git_providers/azuredevops_provider.py | 2 +- pr_agent/settings/configuration.toml | 2 +- pr_agent/tools/pr_description.py | 60 +++++++++++++------ 4 files changed, 47 insertions(+), 22 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 00d52a14..eaa8ce47 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -70,7 +70,8 @@ class ReasoningEffort(str, Enum): class PRDescriptionHeader(str, Enum): - CHANGES_WALKTHROUGH = "### **Changes walkthrough** 📝" + DIAGRAM_WALKTHROUGH = "Diagram Walkthrough" + FILE_WALKTHROUGH = "File Walkthrough" def get_setting(key: str) -> Any: @@ -1284,7 +1285,7 @@ def process_description(description_full: str) -> Tuple[str, List]: if not description_full: return "", [] - description_split = description_full.split(PRDescriptionHeader.CHANGES_WALKTHROUGH.value) + description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value) base_description_str = description_split[0] changes_walkthrough_str = "" files = [] diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 283f28c7..cba81654 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -380,7 +380,7 @@ class AzureDevopsProvider(GitProvider): pr_body = pr_body[:ind] if len(pr_body) > MAX_PR_DESCRIPTION_AZURE_LENGTH: - changes_walkthrough_text = PRDescriptionHeader.CHANGES_WALKTHROUGH.value + changes_walkthrough_text = PRDescriptionHeader.FILE_WALKTHROUGH.value ind = pr_body.find(changes_walkthrough_text) if ind != -1: pr_body = pr_body[:ind] diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index e75cfa56..3613d136 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -106,7 +106,7 @@ extra_instructions = "" enable_pr_type=true final_update_message = true enable_help_text=false -enable_help_comment=true +enable_help_comment=false enable_pr_diagram=true # adds a section with a diagram of the PR changes # describe as comment publish_description_as_comment=false diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index f0446b05..c9ecdbe2 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -128,7 +128,7 @@ class PRDescription: pr_title, pr_body, changes_walkthrough, pr_file_changes = self._prepare_pr_answer() if not self.git_provider.is_supported( "publish_file_comments") or not get_settings().pr_description.inline_file_summary: - pr_body += "\n\n" + changes_walkthrough + pr_body += "\n\n" + changes_walkthrough + "___\n\n" get_logger().debug("PR output", artifact={"title": pr_title, "body": pr_body}) # Add help text if gfm_markdown is supported @@ -331,7 +331,8 @@ class PRDescription: else: original_prediction_dict = original_prediction_loaded if original_prediction_dict: - filenames_predicted = [file.get('filename', '').strip() for file in original_prediction_dict.get('pr_files', [])] + files = original_prediction_dict.get('pr_files', []) + filenames_predicted = [file.get('filename', '').strip() for file in files if isinstance(file, dict)] else: filenames_predicted = [] @@ -555,15 +556,11 @@ class PRDescription: """ # Iterate over the dictionary items and append the key and value to 'markdown_text' in a markdown format - markdown_text = "" # Don't display 'PR Labels' if 'labels' in self.data and self.git_provider.is_supported("get_labels"): self.data.pop('labels') if not get_settings().pr_description.enable_pr_type: self.data.pop('type') - for key, value in self.data.items(): - markdown_text += f"## **{key}**\n\n" - markdown_text += f"{value}\n\n" # Remove the 'PR Title' key from the dictionary ai_title = self.data.pop('title', self.vars["title"]) @@ -579,6 +576,10 @@ class PRDescription: pr_body, changes_walkthrough = "", "" pr_file_changes = [] for idx, (key, value) in enumerate(self.data.items()): + if key == 'changes_diagram': + pr_body += f"### {PRDescriptionHeader.DIAGRAM_WALKTHROUGH.value}\n\n" + pr_body += f"{value}\n\n" + continue if key == 'pr_files': value = self.file_label_dict else: @@ -597,9 +598,15 @@ class PRDescription: pr_body += f'- `{filename}`: {description}\n' if self.git_provider.is_supported("gfm_markdown"): pr_body += "\n" - elif 'pr_files' in key.lower() and get_settings().pr_description.enable_semantic_files_types: - changes_walkthrough, pr_file_changes = self.process_pr_files_prediction(changes_walkthrough, value) - changes_walkthrough = f"{PRDescriptionHeader.CHANGES_WALKTHROUGH.value}\n{changes_walkthrough}" + elif 'pr_files' in key.lower() and get_settings().pr_description.enable_semantic_files_types: # 'File Walkthrough' section + changes_walkthrough_table, pr_file_changes = self.process_pr_files_prediction(changes_walkthrough, value) + if get_settings().pr_description.get('file_table_collapsible_open_by_default', False): + initial_status = " open" + else: + initial_status = "" + changes_walkthrough = f"

{PRDescriptionHeader.FILE_WALKTHROUGH.value}

\n\n" + changes_walkthrough += f"{changes_walkthrough_table}\n\n" + changes_walkthrough += "\n\n" elif key.lower().strip() == 'description': if isinstance(value, list): value = ', '.join(v.rstrip() for v in value) @@ -633,14 +640,18 @@ class PRDescription: artifact={"file": file}) continue filename = file['filename'].replace("'", "`").replace('"', '`') - changes_summary = file.get('changes_summary', "").strip() + changes_summary = file.get('changes_summary', "") + if not changes_summary: + get_logger().warning(f"Empty changes summary in file label dict, skipping file", + artifact={"file": file}) + changes_summary = changes_summary.strip() changes_title = file['changes_title'].strip() label = file.get('label').strip().lower() if label not in file_label_dict: file_label_dict[label] = [] file_label_dict[label].append((filename, changes_title, changes_summary)) except Exception as e: - get_logger().error(f"Error preparing file label dict {self.pr_id}: {e}") + get_logger().exception(f"Error preparing file label dict {self.pr_id}") pass return file_label_dict @@ -720,7 +731,7 @@ class PRDescription: pr_body += """""" except Exception as e: - get_logger().error(f"Error processing PR files to markdown {self.pr_id}: {str(e)}") + get_logger().error(f"Error processing pr files to markdown {self.pr_id}: {str(e)}") pass return pr_body, pr_comments @@ -776,14 +787,21 @@ def insert_br_after_x_chars(text: str, x=70): if count_chars_without_html(text) < x: return text + is_list = text.lstrip().startswith(("- ", "* ")) + # replace odd instances of ` with and even instances of ` with text = replace_code_tags(text) - # convert list items to
  • - if text.startswith("- ") or text.startswith("* "): - text = "
  • " + text[2:] - text = text.replace("\n- ", '
  • ').replace("\n - ", '
  • ') - text = text.replace("\n* ", '
  • ').replace("\n * ", '
  • ') + # convert list items to
  • only if the text is identified as a list + if is_list: + # To handle lists that start with indentation + leading_whitespace = text[:len(text) - len(text.lstrip())] + body = text.lstrip() + body = "
  • " + body[2:] + text = leading_whitespace + body + + text = text.replace("\n- ", '
  • ').replace("\n - ", '
  • ') + text = text.replace("\n* ", '
  • ').replace("\n * ", '
  • ') # convert new lines to
    text = text.replace("\n", '
    ') @@ -823,7 +841,13 @@ def insert_br_after_x_chars(text: str, x=70): is_inside_code = True if "" in word: is_inside_code = False - return ''.join(new_text).strip() + + processed_text = ''.join(new_text).strip() + + if is_list: + processed_text = f"
      {processed_text}
    " + + return processed_text def replace_code_tags(text): From e8c73e7baaf979047405814445a36503fd2a7ae2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 18 Jul 2025 08:54:52 +0300 Subject: [PATCH 2/5] refactor(utils): improve file walkthrough parsing with regex and better error handling --- pr_agent/algo/utils.py | 43 +++++++--- tests/unittest/test_convert_to_markdown.py | 95 +++++++++++----------- 2 files changed, 80 insertions(+), 58 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index eaa8ce47..9b924554 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -1285,14 +1285,35 @@ def process_description(description_full: str) -> Tuple[str, List]: if not description_full: return "", [] - description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value) - base_description_str = description_split[0] - changes_walkthrough_str = "" - files = [] - if len(description_split) > 1: - changes_walkthrough_str = description_split[1] + # description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value) + if PRDescriptionHeader.FILE_WALKTHROUGH.value in description_full: + try: + # FILE_WALKTHROUGH are presented in a collapsible section in the description + regex_pattern = r'\s*\s*

    \s*' + re.escape(PRDescriptionHeader.FILE_WALKTHROUGH.value) + r'\s*

    \s*
    ' + description_split = re.split(regex_pattern, description_full, maxsplit=1, flags=re.DOTALL) + + # If the regex pattern is not found, fallback to the previous method + if len(description_split) == 1: + get_logger().debug("Could not find regex pattern for file walkthrough, falling back to simple split") + description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1) + except Exception as e: + get_logger().warning(f"Failed to split description using regex, falling back to simple split: {e}") + description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1) + + if len(description_split) < 2: + get_logger().error("Failed to split description into base and changes walkthrough", artifact={'description': description_full}) + return description_full.strip(), [] + + base_description_str = description_split[0].strip() + changes_walkthrough_str = "" + files = [] + if len(description_split) > 1: + changes_walkthrough_str = description_split[1] + else: + get_logger().debug("No changes walkthrough found") else: - get_logger().debug("No changes walkthrough found") + base_description_str = description_full.strip() + return base_description_str, [] try: if changes_walkthrough_str: @@ -1315,18 +1336,20 @@ def process_description(description_full: str) -> Tuple[str, List]: try: if isinstance(file_data, tuple): file_data = file_data[0] - pattern = r'
    \s*(.*?)\s*
    (.*?).*?
    \s*
    \s*(.*?)\s*
  • (.*?)
  • ' + pattern = r'
    \s*(.*?)\s*
    (.*?).*?
    \s*
    \s*(.*?)\s*(?:
  • |•)(.*?)
  • ' res = re.search(pattern, file_data, re.DOTALL) if not res or res.lastindex != 4: pattern_back = r'
    \s*(.*?)
    (.*?).*?
    \s*
    \s*(.*?)\n\n\s*(.*?)
    ' res = re.search(pattern_back, file_data, re.DOTALL) if not res or res.lastindex != 4: - pattern_back = r'
    \s*(.*?)\s*
    (.*?).*?
    \s*
    \s*(.*?)\s*-\s*(.*?)\s*
    ' # looking for hyphen ('- ') + pattern_back = r'
    \s*(.*?)\s*
    (.*?).*?
    \s*
    \s*(.*?)\s*-\s*(.*?)\s*
    ' # looking for hypen ('- ') res = re.search(pattern_back, file_data, re.DOTALL) if res and res.lastindex == 4: short_filename = res.group(1).strip() short_summary = res.group(2).strip() long_filename = res.group(3).strip() + if long_filename.endswith('
      '): + long_filename = long_filename[:-4].strip() long_summary = res.group(4).strip() long_summary = long_summary.replace('
      *', '\n*').replace('
      ','').replace('\n','
      ') long_summary = h.handle(long_summary).strip() @@ -1345,7 +1368,7 @@ def process_description(description_full: str) -> Tuple[str, List]: if '...' in file_data: pass # PR with many files. some did not get analyzed else: - get_logger().error(f"Failed to parse description", artifact={'description': file_data}) + get_logger().warning(f"Failed to parse description", artifact={'description': file_data}) except Exception as e: get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data}) diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py index 84e01942..187ea4a8 100644 --- a/tests/unittest/test_convert_to_markdown.py +++ b/tests/unittest/test_convert_to_markdown.py @@ -51,7 +51,7 @@ class TestConvertToMarkdown: input_data = {'review': { 'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n', 'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}} - + expected_output = textwrap.dedent(f"""\ {PRReviewHeader.REGULAR.value} 🔍 @@ -67,12 +67,12 @@ class TestConvertToMarkdown: """) assert convert_to_markdown_v2(input_data).strip() == expected_output.strip() - + def test_simple_dictionary_input_without_gfm_supported(self): input_data = {'review': { 'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n', 'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}} - + expected_output = textwrap.dedent("""\ ## PR Reviewer Guide 🔍 @@ -89,74 +89,74 @@ class TestConvertToMarkdown: """) assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output.strip() - + def test_key_issues_to_review(self): input_data = {'review': { 'key_issues_to_review': [ { - 'relevant_file' : 'src/utils.py', - 'issue_header' : 'Code Smell', - 'issue_content' : 'The function is too long and complex.', + 'relevant_file': 'src/utils.py', + 'issue_header': 'Code Smell', + 'issue_content': 'The function is too long and complex.', 'start_line': 30, 'end_line': 50, } ] }} mock_git_provider = Mock() - reference_link = 'https://github.com/qodo/pr-agent/pull/1/files#diff-hashvalue-R174' + reference_link = 'https://github.com/qodo/pr-agent/pull/1/files#diff-hashvalue-R174' mock_git_provider.get_line_link.return_value = reference_link expected_output = textwrap.dedent(f"""\ ## PR Reviewer Guide 🔍 - + Here are some key observations to aid the review process: - +
      ⚡ Recommended focus areas for review

      - + Code Smell
      The function is too long and complex. - +
      """) - + assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip() mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50) - + def test_ticket_compliance(self): input_data = {'review': { 'ticket_compliance_check': [ { 'ticket_url': 'https://example.com/ticket/123', - 'ticket_requirements': '- Requirement 1\n- Requirement 2\n', - 'fully_compliant_requirements': '- Requirement 1\n- Requirement 2\n', + 'ticket_requirements': '- Requirement 1\n- Requirement 2\n', + 'fully_compliant_requirements': '- Requirement 1\n- Requirement 2\n', 'not_compliant_requirements': '', 'requires_further_human_verification': '', } ] }} - + expected_output = textwrap.dedent("""\ ## PR Reviewer Guide 🔍 - + Here are some key observations to aid the review process: - +
      - + **🎫 Ticket compliance analysis ✅** - - - + + + **[123](https://example.com/ticket/123) - Fully compliant** - + Compliant requirements: - + - Requirement 1 - Requirement 2 - - - + + +
      """) @@ -179,43 +179,43 @@ class TestConvertToMarkdown: ], 'title': 'Bug Fix', } - ] - } + ] + } } expected_output = textwrap.dedent("""\ ## PR Reviewer Guide 🔍 - + Here are some key observations to aid the review process:
      🔀 Multiple PR themes

      - +
      Sub-PR theme: Refactoring - + ___ - + Relevant files: - + - src/file1.py - src/file2.py ___ - +
      - +
      Sub-PR theme: Bug Fix - + ___ - + Relevant files: - + - src/file3.py ___ - +
      - +
      """) @@ -228,7 +228,6 @@ class TestConvertToMarkdown: expected_output = '' - assert convert_to_markdown_v2(input_data).strip() == expected_output.strip() def test_dictionary_with_empty_dictionaries(self): @@ -236,16 +235,16 @@ class TestConvertToMarkdown: expected_output = '' - assert convert_to_markdown_v2(input_data).strip() == expected_output.strip() + class TestBR: def test_br1(self): file_change_description = '- Imported `FilePatchInfo` and `EDIT_TYPE` from `pr_agent.algo.types` instead of `pr_agent.git_providers.git_provider`.' file_change_description_br = insert_br_after_x_chars(file_change_description) - expected_output = ('
    • Imported FilePatchInfo and EDIT_TYPE from ' + expected_output = ('
      • Imported FilePatchInfo and EDIT_TYPE from ' 'pr_agent.algo.types instead
        of ' - 'pr_agent.git_providers.git_provider.') + 'pr_agent.git_providers.git_provider.
      ') assert file_change_description_br == expected_output # print("-----") # print(file_change_description_br) @@ -255,9 +254,9 @@ class TestBR: '- Created a - new -class `ColorPaletteResourcesCollection ColorPaletteResourcesCollection ' 'ColorPaletteResourcesCollection ColorPaletteResourcesCollection`') file_change_description_br = insert_br_after_x_chars(file_change_description) - expected_output = ('
    • Created a - new -class ColorPaletteResourcesCollection
      ' + expected_output = ('
      • Created a - new -class ColorPaletteResourcesCollection
        ' 'ColorPaletteResourcesCollection ColorPaletteResourcesCollection ' - '
        ColorPaletteResourcesCollection') + '

        ColorPaletteResourcesCollection
      ') assert file_change_description_br == expected_output # print("-----") # print(file_change_description_br) From 6179eeca58d2966c11723dc8b00c313e167aab9c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 18 Jul 2025 09:14:52 +0300 Subject: [PATCH 3/5] fix(pr_description): fix template syntax errors and improve formatting in prompts --- docs/docs/tools/describe.md | 3 +++ pr_agent/settings/pr_description_prompts.toml | 24 +++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/docs/docs/tools/describe.md b/docs/docs/tools/describe.md index d929679c..74408918 100644 --- a/docs/docs/tools/describe.md +++ b/docs/docs/tools/describe.md @@ -124,6 +124,9 @@ This option is enabled by default via the `pr_description.enable_pr_diagram` par enable_semantic_files_types If set to true, "Changes walkthrough" section will be generated. Default is true. + + file_table_collapsible_open_by_default + If set to true, the file list in the "Changes walkthrough" section will be open by default. If set to false, it will be closed by default. Default is false. collapsible_file_list If set to true, the file list in the "Changes walkthrough" section will be collapsible. If set to "adaptive", the file list will be collapsible only if there are more than 8 files. Default is "adaptive". diff --git a/pr_agent/settings/pr_description_prompts.toml b/pr_agent/settings/pr_description_prompts.toml index cc789c94..ba51e1ec 100644 --- a/pr_agent/settings/pr_description_prompts.toml +++ b/pr_agent/settings/pr_description_prompts.toml @@ -48,8 +48,8 @@ class PRDescription(BaseModel): description: str = Field(description="summarize the PR changes in up to four bullet points, each up to 8 words. For large PRs, add sub-bullets if needed. Order bullets by importance, with each bullet highlighting a key change group.") title: str = Field(description="a concise and descriptive title that captures the PR's main theme") {%- if enable_pr_diagram %} - changes_diagram: str = Field(description="a horizontal diagram that represents the main PR changes, in the format of a valid mermaid LR flowchart. The diagram should be concise and easy to read. Leave empty if no diagram is relevant. To create robust Mermaid diagrams, follow this two-step process: (1) Declare the nodes: nodeID[\"node description\"]. (2) Then define the links: nodeID1 -- \"link text\" --> nodeID2. Node description must always be surrounded with quotation marks.") -{%- endif %} + changes_diagram: str = Field(description='a horizontal diagram that represents the main PR changes, in the format of a valid mermaid LR flowchart. The diagram should be concise and easy to read. Leave empty if no diagram is relevant. To create robust Mermaid diagrams, follow this two-step process: (1) Declare the nodes: nodeID["node description"]. (2) Then define the links: nodeID1 -- "link text" --> nodeID2. Node description must always be surrounded with double quotation marks') +'{%- endif %} {%- if enable_semantic_files_types %} pr_files: List[FileDescription] = Field(max_items=20, description="a list of all the files that were changed in the PR, and summary of their changes. Each file must be analyzed regardless of change size.") {%- endif %} @@ -67,11 +67,11 @@ description: | title: | ... {%- if enable_pr_diagram %} - changes_diagram: | - ```mermaid - flowchart LR - ... - ``` +changes_diagram: | + ```mermaid + flowchart LR + ... + ``` {%- endif %} {%- if enable_semantic_files_types %} pr_files: @@ -155,11 +155,11 @@ description: | title: | ... {%- if enable_pr_diagram %} - changes_diagram: | - ```mermaid - flowchart LR - ... - ``` +changes_diagram: | + ```mermaid + flowchart LR + ... + ``` {%- endif %} {%- if enable_semantic_files_types %} pr_files: From 8906a81a2e346d0275f332ad64d0b48d52bf4329 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Fri, 18 Jul 2025 09:17:14 +0300 Subject: [PATCH 4/5] fix(docs): fix table row structure in describe tool documentation --- docs/docs/tools/describe.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/docs/tools/describe.md b/docs/docs/tools/describe.md index 74408918..1ade6c56 100644 --- a/docs/docs/tools/describe.md +++ b/docs/docs/tools/describe.md @@ -124,9 +124,10 @@ This option is enabled by default via the `pr_description.enable_pr_diagram` par enable_semantic_files_types If set to true, "Changes walkthrough" section will be generated. Default is true. - + file_table_collapsible_open_by_default If set to true, the file list in the "Changes walkthrough" section will be open by default. If set to false, it will be closed by default. Default is false. + collapsible_file_list If set to true, the file list in the "Changes walkthrough" section will be collapsible. If set to "adaptive", the file list will be collapsible only if there are more than 8 files. Default is "adaptive". From 7cc4206b702821c1687ac41275e26764cd425959 Mon Sep 17 00:00:00 2001 From: Tal Date: Fri, 18 Jul 2025 09:20:41 +0300 Subject: [PATCH 5/5] Update pr_agent/tools/pr_description.py Co-authored-by: qodo-merge-for-open-source[bot] <189517486+qodo-merge-for-open-source[bot]@users.noreply.github.com> --- pr_agent/tools/pr_description.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index c9ecdbe2..5abea388 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -644,6 +644,7 @@ class PRDescription: if not changes_summary: get_logger().warning(f"Empty changes summary in file label dict, skipping file", artifact={"file": file}) + continue changes_summary = changes_summary.strip() changes_title = file['changes_title'].strip() label = file.get('label').strip().lower()