mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-19 12:00:37 +08:00
refactor(utils): improve file walkthrough parsing with regex and better error handling
This commit is contained in:
@ -1285,14 +1285,35 @@ def process_description(description_full: str) -> Tuple[str, List]:
|
|||||||
if not description_full:
|
if not description_full:
|
||||||
return "", []
|
return "", []
|
||||||
|
|
||||||
description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value)
|
# description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value)
|
||||||
base_description_str = description_split[0]
|
if PRDescriptionHeader.FILE_WALKTHROUGH.value in description_full:
|
||||||
changes_walkthrough_str = ""
|
try:
|
||||||
files = []
|
# FILE_WALKTHROUGH are presented in a collapsible section in the description
|
||||||
if len(description_split) > 1:
|
regex_pattern = r'<details.*?>\s*<summary>\s*<h3>\s*' + re.escape(PRDescriptionHeader.FILE_WALKTHROUGH.value) + r'\s*</h3>\s*</summary>'
|
||||||
changes_walkthrough_str = description_split[1]
|
description_split = re.split(regex_pattern, description_full, maxsplit=1, flags=re.DOTALL)
|
||||||
|
|
||||||
|
# If the regex pattern is not found, fallback to the previous method
|
||||||
|
if len(description_split) == 1:
|
||||||
|
get_logger().debug("Could not find regex pattern for file walkthrough, falling back to simple split")
|
||||||
|
description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1)
|
||||||
|
except Exception as e:
|
||||||
|
get_logger().warning(f"Failed to split description using regex, falling back to simple split: {e}")
|
||||||
|
description_split = description_full.split(PRDescriptionHeader.FILE_WALKTHROUGH.value, 1)
|
||||||
|
|
||||||
|
if len(description_split) < 2:
|
||||||
|
get_logger().error("Failed to split description into base and changes walkthrough", artifact={'description': description_full})
|
||||||
|
return description_full.strip(), []
|
||||||
|
|
||||||
|
base_description_str = description_split[0].strip()
|
||||||
|
changes_walkthrough_str = ""
|
||||||
|
files = []
|
||||||
|
if len(description_split) > 1:
|
||||||
|
changes_walkthrough_str = description_split[1]
|
||||||
|
else:
|
||||||
|
get_logger().debug("No changes walkthrough found")
|
||||||
else:
|
else:
|
||||||
get_logger().debug("No changes walkthrough found")
|
base_description_str = description_full.strip()
|
||||||
|
return base_description_str, []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if changes_walkthrough_str:
|
if changes_walkthrough_str:
|
||||||
@ -1315,18 +1336,20 @@ def process_description(description_full: str) -> Tuple[str, List]:
|
|||||||
try:
|
try:
|
||||||
if isinstance(file_data, tuple):
|
if isinstance(file_data, tuple):
|
||||||
file_data = file_data[0]
|
file_data = file_data[0]
|
||||||
pattern = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*<li>(.*?)</details>'
|
pattern = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*(?:<li>|•)(.*?)</details>'
|
||||||
res = re.search(pattern, file_data, re.DOTALL)
|
res = re.search(pattern, file_data, re.DOTALL)
|
||||||
if not res or res.lastindex != 4:
|
if not res or res.lastindex != 4:
|
||||||
pattern_back = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\n\n\s*(.*?)</details>'
|
pattern_back = r'<details>\s*<summary><strong>(.*?)</strong><dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\n\n\s*(.*?)</details>'
|
||||||
res = re.search(pattern_back, file_data, re.DOTALL)
|
res = re.search(pattern_back, file_data, re.DOTALL)
|
||||||
if not res or res.lastindex != 4:
|
if not res or res.lastindex != 4:
|
||||||
pattern_back = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*-\s*(.*?)\s*</details>' # looking for hyphen ('- ')
|
pattern_back = r'<details>\s*<summary><strong>(.*?)</strong>\s*<dd><code>(.*?)</code>.*?</summary>\s*<hr>\s*(.*?)\s*-\s*(.*?)\s*</details>' # looking for hypen ('- ')
|
||||||
res = re.search(pattern_back, file_data, re.DOTALL)
|
res = re.search(pattern_back, file_data, re.DOTALL)
|
||||||
if res and res.lastindex == 4:
|
if res and res.lastindex == 4:
|
||||||
short_filename = res.group(1).strip()
|
short_filename = res.group(1).strip()
|
||||||
short_summary = res.group(2).strip()
|
short_summary = res.group(2).strip()
|
||||||
long_filename = res.group(3).strip()
|
long_filename = res.group(3).strip()
|
||||||
|
if long_filename.endswith('<ul>'):
|
||||||
|
long_filename = long_filename[:-4].strip()
|
||||||
long_summary = res.group(4).strip()
|
long_summary = res.group(4).strip()
|
||||||
long_summary = long_summary.replace('<br> *', '\n*').replace('<br>','').replace('\n','<br>')
|
long_summary = long_summary.replace('<br> *', '\n*').replace('<br>','').replace('\n','<br>')
|
||||||
long_summary = h.handle(long_summary).strip()
|
long_summary = h.handle(long_summary).strip()
|
||||||
@ -1345,7 +1368,7 @@ def process_description(description_full: str) -> Tuple[str, List]:
|
|||||||
if '<code>...</code>' in file_data:
|
if '<code>...</code>' in file_data:
|
||||||
pass # PR with many files. some did not get analyzed
|
pass # PR with many files. some did not get analyzed
|
||||||
else:
|
else:
|
||||||
get_logger().error(f"Failed to parse description", artifact={'description': file_data})
|
get_logger().warning(f"Failed to parse description", artifact={'description': file_data})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})
|
get_logger().exception(f"Failed to process description: {e}", artifact={'description': file_data})
|
||||||
|
|
||||||
|
@ -51,7 +51,7 @@ class TestConvertToMarkdown:
|
|||||||
input_data = {'review': {
|
input_data = {'review': {
|
||||||
'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n',
|
'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n',
|
||||||
'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}}
|
'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}}
|
||||||
|
|
||||||
expected_output = textwrap.dedent(f"""\
|
expected_output = textwrap.dedent(f"""\
|
||||||
{PRReviewHeader.REGULAR.value} 🔍
|
{PRReviewHeader.REGULAR.value} 🔍
|
||||||
|
|
||||||
@ -67,12 +67,12 @@ class TestConvertToMarkdown:
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
||||||
|
|
||||||
def test_simple_dictionary_input_without_gfm_supported(self):
|
def test_simple_dictionary_input_without_gfm_supported(self):
|
||||||
input_data = {'review': {
|
input_data = {'review': {
|
||||||
'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n',
|
'estimated_effort_to_review_[1-5]': '1, because the changes are minimal and straightforward, focusing on a single functionality addition.\n',
|
||||||
'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}}
|
'relevant_tests': 'No\n', 'possible_issues': 'No\n', 'security_concerns': 'No\n'}}
|
||||||
|
|
||||||
expected_output = textwrap.dedent("""\
|
expected_output = textwrap.dedent("""\
|
||||||
## PR Reviewer Guide 🔍
|
## PR Reviewer Guide 🔍
|
||||||
|
|
||||||
@ -89,74 +89,74 @@ class TestConvertToMarkdown:
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output.strip()
|
assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output.strip()
|
||||||
|
|
||||||
def test_key_issues_to_review(self):
|
def test_key_issues_to_review(self):
|
||||||
input_data = {'review': {
|
input_data = {'review': {
|
||||||
'key_issues_to_review': [
|
'key_issues_to_review': [
|
||||||
{
|
{
|
||||||
'relevant_file' : 'src/utils.py',
|
'relevant_file': 'src/utils.py',
|
||||||
'issue_header' : 'Code Smell',
|
'issue_header': 'Code Smell',
|
||||||
'issue_content' : 'The function is too long and complex.',
|
'issue_content': 'The function is too long and complex.',
|
||||||
'start_line': 30,
|
'start_line': 30,
|
||||||
'end_line': 50,
|
'end_line': 50,
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
mock_git_provider = Mock()
|
mock_git_provider = Mock()
|
||||||
reference_link = 'https://github.com/qodo/pr-agent/pull/1/files#diff-hashvalue-R174'
|
reference_link = 'https://github.com/qodo/pr-agent/pull/1/files#diff-hashvalue-R174'
|
||||||
mock_git_provider.get_line_link.return_value = reference_link
|
mock_git_provider.get_line_link.return_value = reference_link
|
||||||
|
|
||||||
expected_output = textwrap.dedent(f"""\
|
expected_output = textwrap.dedent(f"""\
|
||||||
## PR Reviewer Guide 🔍
|
## PR Reviewer Guide 🔍
|
||||||
|
|
||||||
Here are some key observations to aid the review process:
|
Here are some key observations to aid the review process:
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr><td>⚡ <strong>Recommended focus areas for review</strong><br><br>
|
<tr><td>⚡ <strong>Recommended focus areas for review</strong><br><br>
|
||||||
|
|
||||||
<a href='{reference_link}'><strong>Code Smell</strong></a><br>The function is too long and complex.
|
<a href='{reference_link}'><strong>Code Smell</strong></a><br>The function is too long and complex.
|
||||||
|
|
||||||
</td></tr>
|
</td></tr>
|
||||||
</table>
|
</table>
|
||||||
""")
|
""")
|
||||||
|
|
||||||
assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip()
|
assert convert_to_markdown_v2(input_data, git_provider=mock_git_provider).strip() == expected_output.strip()
|
||||||
mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50)
|
mock_git_provider.get_line_link.assert_called_with('src/utils.py', 30, 50)
|
||||||
|
|
||||||
def test_ticket_compliance(self):
|
def test_ticket_compliance(self):
|
||||||
input_data = {'review': {
|
input_data = {'review': {
|
||||||
'ticket_compliance_check': [
|
'ticket_compliance_check': [
|
||||||
{
|
{
|
||||||
'ticket_url': 'https://example.com/ticket/123',
|
'ticket_url': 'https://example.com/ticket/123',
|
||||||
'ticket_requirements': '- Requirement 1\n- Requirement 2\n',
|
'ticket_requirements': '- Requirement 1\n- Requirement 2\n',
|
||||||
'fully_compliant_requirements': '- Requirement 1\n- Requirement 2\n',
|
'fully_compliant_requirements': '- Requirement 1\n- Requirement 2\n',
|
||||||
'not_compliant_requirements': '',
|
'not_compliant_requirements': '',
|
||||||
'requires_further_human_verification': '',
|
'requires_further_human_verification': '',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
expected_output = textwrap.dedent("""\
|
expected_output = textwrap.dedent("""\
|
||||||
## PR Reviewer Guide 🔍
|
## PR Reviewer Guide 🔍
|
||||||
|
|
||||||
Here are some key observations to aid the review process:
|
Here are some key observations to aid the review process:
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr><td>
|
<tr><td>
|
||||||
|
|
||||||
**🎫 Ticket compliance analysis ✅**
|
**🎫 Ticket compliance analysis ✅**
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
**[123](https://example.com/ticket/123) - Fully compliant**
|
**[123](https://example.com/ticket/123) - Fully compliant**
|
||||||
|
|
||||||
Compliant requirements:
|
Compliant requirements:
|
||||||
|
|
||||||
- Requirement 1
|
- Requirement 1
|
||||||
- Requirement 2
|
- Requirement 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</td></tr>
|
</td></tr>
|
||||||
</table>
|
</table>
|
||||||
""")
|
""")
|
||||||
@ -179,43 +179,43 @@ class TestConvertToMarkdown:
|
|||||||
],
|
],
|
||||||
'title': 'Bug Fix',
|
'title': 'Bug Fix',
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
expected_output = textwrap.dedent("""\
|
expected_output = textwrap.dedent("""\
|
||||||
## PR Reviewer Guide 🔍
|
## PR Reviewer Guide 🔍
|
||||||
|
|
||||||
Here are some key observations to aid the review process:
|
Here are some key observations to aid the review process:
|
||||||
|
|
||||||
<table>
|
<table>
|
||||||
<tr><td>🔀 <strong>Multiple PR themes</strong><br><br>
|
<tr><td>🔀 <strong>Multiple PR themes</strong><br><br>
|
||||||
|
|
||||||
<details><summary>
|
<details><summary>
|
||||||
Sub-PR theme: <b>Refactoring</b></summary>
|
Sub-PR theme: <b>Refactoring</b></summary>
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
Relevant files:
|
Relevant files:
|
||||||
|
|
||||||
- src/file1.py
|
- src/file1.py
|
||||||
- src/file2.py
|
- src/file2.py
|
||||||
___
|
___
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
<details><summary>
|
<details><summary>
|
||||||
Sub-PR theme: <b>Bug Fix</b></summary>
|
Sub-PR theme: <b>Bug Fix</b></summary>
|
||||||
|
|
||||||
___
|
___
|
||||||
|
|
||||||
Relevant files:
|
Relevant files:
|
||||||
|
|
||||||
- src/file3.py
|
- src/file3.py
|
||||||
___
|
___
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
</td></tr>
|
</td></tr>
|
||||||
</table>
|
</table>
|
||||||
""")
|
""")
|
||||||
@ -228,7 +228,6 @@ class TestConvertToMarkdown:
|
|||||||
|
|
||||||
expected_output = ''
|
expected_output = ''
|
||||||
|
|
||||||
|
|
||||||
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
||||||
|
|
||||||
def test_dictionary_with_empty_dictionaries(self):
|
def test_dictionary_with_empty_dictionaries(self):
|
||||||
@ -236,16 +235,16 @@ class TestConvertToMarkdown:
|
|||||||
|
|
||||||
expected_output = ''
|
expected_output = ''
|
||||||
|
|
||||||
|
|
||||||
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
|
||||||
|
|
||||||
|
|
||||||
class TestBR:
|
class TestBR:
|
||||||
def test_br1(self):
|
def test_br1(self):
|
||||||
file_change_description = '- Imported `FilePatchInfo` and `EDIT_TYPE` from `pr_agent.algo.types` instead of `pr_agent.git_providers.git_provider`.'
|
file_change_description = '- Imported `FilePatchInfo` and `EDIT_TYPE` from `pr_agent.algo.types` instead of `pr_agent.git_providers.git_provider`.'
|
||||||
file_change_description_br = insert_br_after_x_chars(file_change_description)
|
file_change_description_br = insert_br_after_x_chars(file_change_description)
|
||||||
expected_output = ('<li>Imported <code>FilePatchInfo</code> and <code>EDIT_TYPE</code> from '
|
expected_output = ('<ul><li>Imported <code>FilePatchInfo</code> and <code>EDIT_TYPE</code> from '
|
||||||
'<code>pr_agent.algo.types</code> instead <br>of '
|
'<code>pr_agent.algo.types</code> instead <br>of '
|
||||||
'<code>pr_agent.git_providers.git_provider</code>.')
|
'<code>pr_agent.git_providers.git_provider</code>.</ul>')
|
||||||
assert file_change_description_br == expected_output
|
assert file_change_description_br == expected_output
|
||||||
# print("-----")
|
# print("-----")
|
||||||
# print(file_change_description_br)
|
# print(file_change_description_br)
|
||||||
@ -255,9 +254,9 @@ class TestBR:
|
|||||||
'- Created a - new -class `ColorPaletteResourcesCollection ColorPaletteResourcesCollection '
|
'- Created a - new -class `ColorPaletteResourcesCollection ColorPaletteResourcesCollection '
|
||||||
'ColorPaletteResourcesCollection ColorPaletteResourcesCollection`')
|
'ColorPaletteResourcesCollection ColorPaletteResourcesCollection`')
|
||||||
file_change_description_br = insert_br_after_x_chars(file_change_description)
|
file_change_description_br = insert_br_after_x_chars(file_change_description)
|
||||||
expected_output = ('<li>Created a - new -class <code>ColorPaletteResourcesCollection </code><br><code>'
|
expected_output = ('<ul><li>Created a - new -class <code>ColorPaletteResourcesCollection </code><br><code>'
|
||||||
'ColorPaletteResourcesCollection ColorPaletteResourcesCollection '
|
'ColorPaletteResourcesCollection ColorPaletteResourcesCollection '
|
||||||
'</code><br><code>ColorPaletteResourcesCollection</code>')
|
'</code><br><code>ColorPaletteResourcesCollection</code></ul>')
|
||||||
assert file_change_description_br == expected_output
|
assert file_change_description_br == expected_output
|
||||||
# print("-----")
|
# print("-----")
|
||||||
# print(file_change_description_br)
|
# print(file_change_description_br)
|
||||||
|
Reference in New Issue
Block a user