From 7e8361b5fde6ff7ef53ed4260ad03deaafbd2654 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 19 Dec 2024 20:30:56 +0200 Subject: [PATCH 1/4] feat: enhance code review output with collapsible code snippets and variable links --- pr_agent/algo/utils.py | 84 ++++++++++++++++++---- pr_agent/git_providers/github_provider.py | 15 +--- pr_agent/settings/pr_reviewer_prompts.toml | 2 +- pr_agent/tools/pr_reviewer.py | 4 +- 4 files changed, 77 insertions(+), 28 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index cc0db89f..9fab73ec 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -104,7 +104,8 @@ def unique_strings(input_list: List[str]) -> List[str]: def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None, - git_provider=None) -> str: + git_provider=None, + files=None) -> str: """ Convert a dictionary of data into markdown format. Args: @@ -180,7 +181,7 @@ def convert_to_markdown_v2(output_data: dict, if is_value_no(value): markdown_text += f'### {emoji} No relevant tests\n\n' else: - markdown_text += f"### {emoji} PR contains tests\n\n" + markdown_text += f"### PR contains tests\n\n" elif 'ticket compliance check' in key_nice.lower(): markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) elif 'security concerns' in key_nice.lower(): @@ -231,21 +232,35 @@ def convert_to_markdown_v2(output_data: dict, issue_content = issue.get('issue_content', '').strip() start_line = int(str(issue.get('start_line', 0)).strip()) end_line = int(str(issue.get('end_line', 0)).strip()) - if git_provider: - reference_link = git_provider.get_line_link(relevant_file, start_line, end_line) - else: - reference_link = None + + relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line) + reference_link = git_provider.get_line_link(relevant_file, start_line, end_line) if gfm_supported: - if reference_link is not None and len(reference_link) > 0: + if get_settings().pr_reviewer.extra_issue_links: + issue_content_linked = copy.deepcopy(issue_content) + referenced_variables_list = issue.get('referenced_variables', []) + for component in referenced_variables_list: + name = component['variable_name'].strip().strip('`') + + ind = issue_content.find(name) + if ind != -1: + reference_link_component = git_provider.get_line_link(relevant_file, component[ + 'relevant_line'], component['relevant_line']) + issue_content_linked = issue_content_linked[ + :ind - 1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[ + ind + len( + name) + 1:] + else: + get_logger().info( + f"Failed to find variable in issue content: {component['variable_name'].strip()}") + issue_content = issue_content_linked + if relevant_lines_str: + issue_str = f"
{issue_header}\n\n{issue_content}\n\n{relevant_lines_str}\n\n
" + else: issue_str = f"{issue_header}
{issue_content}" - else: - issue_str = f"{issue_header}
{issue_content}" else: - if reference_link is not None and len(reference_link) > 0: - issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n" - else: - issue_str = f"**{issue_header}**\n\n{issue_content}\n\n" + issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n" markdown_text += f"{issue_str}\n\n" except Exception as e: get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}") @@ -280,6 +295,25 @@ def convert_to_markdown_v2(output_data: dict, return markdown_text +def extract_relevant_lines_str(end_line, files, relevant_file, start_line): + try: + relevant_lines_str = "" + if files: + files = set_file_languages(files) + for file in files: + if file.filename.strip() == relevant_file: + if not file.head_file: + get_logger().warning(f"No content found in file: {file.filename}") + return "" + relevant_file_lines = file.head_file.splitlines() + relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line]) + relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```" + break + return relevant_lines_str + except Exception as e: + get_logger().exception(f"Failed to extract relevant lines: {e}") + return "" + def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str: ticket_compliance_str = "" @@ -1134,3 +1168,27 @@ def get_version() -> str: except PackageNotFoundError: get_logger().warning("Unable to find package named 'pr-agent'") return "unknown" + + +def set_file_languages(diff_files) -> List[FilePatchInfo]: + try: + # if the language is already set, do not change it + if hasattr(diff_files[0], 'language') and diff_files[0].language: + return diff_files + + # map file extensions to programming languages + language_extension_map_org = get_settings().language_extension_map_org + extension_to_language = {} + for language, extensions in language_extension_map_org.items(): + for ext in extensions: + extension_to_language[ext] = language + for file in diff_files: + extension_s = '.' + file.filename.rsplit('.')[-1] + language_name = "txt" + if extension_s and (extension_s in extension_to_language): + language_name = extension_to_language[extension_s] + file.language = language_name.lower() + except Exception as e: + get_logger().exception(f"Failed to set file languages: {e}") + + return diff_files diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index cc8e158a..9ee3e2bf 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -19,7 +19,7 @@ from ..algo.language_handler import is_valid_file from ..algo.types import EDIT_TYPE from ..algo.utils import (PRReviewHeader, Range, clip_tokens, find_line_number_of_relevant_line_in_file, - load_large_diff) + load_large_diff, set_file_languages) from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded @@ -889,18 +889,7 @@ class GithubProvider(GitProvider): RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") - # map file extensions to programming languages - language_extension_map_org = get_settings().language_extension_map_org - extension_to_language = {} - for language, extensions in language_extension_map_org.items(): - for ext in extensions: - extension_to_language[ext] = language - for file in diff_files: - extension_s = '.' + file.filename.rsplit('.')[-1] - language_name = "txt" - if extension_s and (extension_s in extension_to_language): - language_name = extension_to_language[extension_s] - file.language = language_name.lower() + diff_files = set_file_languages(diff_files) for suggestion in code_suggestions_copy: try: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 0b61e8ea..a8c02c2c 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -111,7 +111,7 @@ class Review(BaseModel): {%- if question_str %} insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") {%- endif %} - key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate") + key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.") {%- if require_security_review %} security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- endif %} diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index de41a92f..162c1022 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -270,7 +270,9 @@ class PRReviewer: incremental_review_markdown_text = f"Starting from commit {last_commit_url}" markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"), - incremental_review_markdown_text, git_provider=self.git_provider) + incremental_review_markdown_text, + git_provider=self.git_provider, + files=self.git_provider.get_diff_files()) # Add help text if gfm_markdown is supported if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: From 989670b1598f1271e2d5603d84a86f7adc4d9ef0 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 19 Dec 2024 20:49:40 +0200 Subject: [PATCH 2/4] fix: improve markdown rendering when git provider is unavailable --- pr_agent/algo/utils.py | 39 +++++++++------------- pr_agent/settings/pr_reviewer_prompts.toml | 4 +-- 2 files changed, 17 insertions(+), 26 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 9fab73ec..93b6b543 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -181,7 +181,7 @@ def convert_to_markdown_v2(output_data: dict, if is_value_no(value): markdown_text += f'### {emoji} No relevant tests\n\n' else: - markdown_text += f"### PR contains tests\n\n" + markdown_text += f"### {emoji} PR contains tests\n\n" elif 'ticket compliance check' in key_nice.lower(): markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) elif 'security concerns' in key_nice.lower(): @@ -234,33 +234,24 @@ def convert_to_markdown_v2(output_data: dict, end_line = int(str(issue.get('end_line', 0)).strip()) relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line) - reference_link = git_provider.get_line_link(relevant_file, start_line, end_line) + if git_provider: + reference_link = git_provider.get_line_link(relevant_file, start_line, end_line) + else: + reference_link = None if gfm_supported: - if get_settings().pr_reviewer.extra_issue_links: - issue_content_linked = copy.deepcopy(issue_content) - referenced_variables_list = issue.get('referenced_variables', []) - for component in referenced_variables_list: - name = component['variable_name'].strip().strip('`') - - ind = issue_content.find(name) - if ind != -1: - reference_link_component = git_provider.get_line_link(relevant_file, component[ - 'relevant_line'], component['relevant_line']) - issue_content_linked = issue_content_linked[ - :ind - 1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[ - ind + len( - name) + 1:] - else: - get_logger().info( - f"Failed to find variable in issue content: {component['variable_name'].strip()}") - issue_content = issue_content_linked - if relevant_lines_str: - issue_str = f"
{issue_header}\n\n{issue_content}\n\n{relevant_lines_str}\n\n
" + if reference_link is not None and len(reference_link) > 0: + if relevant_lines_str: + issue_str = f"
{issue_header}\n\n{issue_content}\n\n{relevant_lines_str}\n\n
" + else: + issue_str = f"{issue_header}
{issue_content}" else: - issue_str = f"{issue_header}
{issue_content}" + issue_str = f"{issue_header}
{issue_content}" else: - issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n" + if reference_link is not None and len(reference_link) > 0: + issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n" + else: + issue_str = f"**{issue_header}**\n\n{issue_content}\n\n" markdown_text += f"{issue_str}\n\n" except Exception as e: get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}") diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index a8c02c2c..1822fe14 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -81,7 +81,7 @@ class SubPR(BaseModel): class KeyIssuesComponentLink(BaseModel): relevant_file: str = Field(description="The full file path of the relevant file") issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.") - issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Don't state line numbers here") + issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.") start_line: int = Field(description="The start line that corresponds to this issue in the relevant file") end_line: int = Field(description="The end line that corresponds to this issue in the relevant file") @@ -111,7 +111,7 @@ class Review(BaseModel): {%- if question_str %} insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") {%- endif %} - key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.") + key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-3 issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.") {%- if require_security_review %} security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- endif %} From 3ab2cac089ce41bbea109959a9b0b989c1d83aa3 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 19 Dec 2024 20:59:17 +0200 Subject: [PATCH 3/4] fix: improve markdown rendering when git provider is unavailable --- pr_agent/algo/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 93b6b543..fb0fe2bd 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -229,6 +229,8 @@ def convert_to_markdown_v2(output_data: dict, continue relevant_file = issue.get('relevant_file', '').strip() issue_header = issue.get('issue_header', '').strip() + if issue_header.lower() == 'possible bug': + issue_header = 'Possible Issue' # Make the header less frightening issue_content = issue.get('issue_content', '').strip() start_line = int(str(issue.get('start_line', 0)).strip()) end_line = int(str(issue.get('end_line', 0)).strip()) From c2f1f2dba0cf6b8fd2fab7927032ba1d39cdcdc2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Thu, 19 Dec 2024 21:08:27 +0200 Subject: [PATCH 4/4] fix: improve markdown rendering when git provider is unavailable --- pr_agent/settings/pr_reviewer_prompts.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 1822fe14..6477ecf6 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -80,7 +80,7 @@ class SubPR(BaseModel): class KeyIssuesComponentLink(BaseModel): relevant_file: str = Field(description="The full file path of the relevant file") - issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.") + issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', etc.") issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.") start_line: int = Field(description="The start line that corresponds to this issue in the relevant file") end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")