feat: enhance code review output with collapsible code snippets and variable links

This commit is contained in:
mrT23
2024-12-19 20:30:56 +02:00
parent 84786495ed
commit 7e8361b5fd
4 changed files with 77 additions and 28 deletions

View File

@ -104,7 +104,8 @@ def unique_strings(input_list: List[str]) -> List[str]:
def convert_to_markdown_v2(output_data: dict,
gfm_supported: bool = True,
incremental_review=None,
git_provider=None) -> str:
git_provider=None,
files=None) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
@ -180,7 +181,7 @@ def convert_to_markdown_v2(output_data: dict,
if is_value_no(value):
markdown_text += f'### {emoji} No relevant tests\n\n'
else:
markdown_text += f"### {emoji} PR contains tests\n\n"
markdown_text += f"### PR contains tests\n\n"
elif 'ticket compliance check' in key_nice.lower():
markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported)
elif 'security concerns' in key_nice.lower():
@ -231,21 +232,35 @@ def convert_to_markdown_v2(output_data: dict,
issue_content = issue.get('issue_content', '').strip()
start_line = int(str(issue.get('start_line', 0)).strip())
end_line = int(str(issue.get('end_line', 0)).strip())
if git_provider:
relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line)
reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
else:
reference_link = None
if gfm_supported:
if reference_link is not None and len(reference_link) > 0:
if get_settings().pr_reviewer.extra_issue_links:
issue_content_linked = copy.deepcopy(issue_content)
referenced_variables_list = issue.get('referenced_variables', [])
for component in referenced_variables_list:
name = component['variable_name'].strip().strip('`')
ind = issue_content.find(name)
if ind != -1:
reference_link_component = git_provider.get_line_link(relevant_file, component[
'relevant_line'], component['relevant_line'])
issue_content_linked = issue_content_linked[
:ind - 1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[
ind + len(
name) + 1:]
else:
get_logger().info(
f"Failed to find variable in issue content: {component['variable_name'].strip()}")
issue_content = issue_content_linked
if relevant_lines_str:
issue_str = f"<details><summary><a href='{reference_link}'><strong>{issue_header}</strong></a>\n\n{issue_content}</summary>\n\n{relevant_lines_str}\n\n</details>"
else:
issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
else:
issue_str = f"<strong>{issue_header}</strong><br>{issue_content}"
else:
if reference_link is not None and len(reference_link) > 0:
issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
else:
issue_str = f"**{issue_header}**\n\n{issue_content}\n\n"
markdown_text += f"{issue_str}\n\n"
except Exception as e:
get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}")
@ -280,6 +295,25 @@ def convert_to_markdown_v2(output_data: dict,
return markdown_text
def extract_relevant_lines_str(end_line, files, relevant_file, start_line):
try:
relevant_lines_str = ""
if files:
files = set_file_languages(files)
for file in files:
if file.filename.strip() == relevant_file:
if not file.head_file:
get_logger().warning(f"No content found in file: {file.filename}")
return ""
relevant_file_lines = file.head_file.splitlines()
relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```"
break
return relevant_lines_str
except Exception as e:
get_logger().exception(f"Failed to extract relevant lines: {e}")
return ""
def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str:
ticket_compliance_str = ""
@ -1134,3 +1168,27 @@ def get_version() -> str:
except PackageNotFoundError:
get_logger().warning("Unable to find package named 'pr-agent'")
return "unknown"
def set_file_languages(diff_files) -> List[FilePatchInfo]:
try:
# if the language is already set, do not change it
if hasattr(diff_files[0], 'language') and diff_files[0].language:
return diff_files
# map file extensions to programming languages
language_extension_map_org = get_settings().language_extension_map_org
extension_to_language = {}
for language, extensions in language_extension_map_org.items():
for ext in extensions:
extension_to_language[ext] = language
for file in diff_files:
extension_s = '.' + file.filename.rsplit('.')[-1]
language_name = "txt"
if extension_s and (extension_s in extension_to_language):
language_name = extension_to_language[extension_s]
file.language = language_name.lower()
except Exception as e:
get_logger().exception(f"Failed to set file languages: {e}")
return diff_files

View File

@ -19,7 +19,7 @@ from ..algo.language_handler import is_valid_file
from ..algo.types import EDIT_TYPE
from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
find_line_number_of_relevant_line_in_file,
load_large_diff)
load_large_diff, set_file_languages)
from ..config_loader import get_settings
from ..log import get_logger
from ..servers.utils import RateLimitExceeded
@ -889,18 +889,7 @@ class GithubProvider(GitProvider):
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
# map file extensions to programming languages
language_extension_map_org = get_settings().language_extension_map_org
extension_to_language = {}
for language, extensions in language_extension_map_org.items():
for ext in extensions:
extension_to_language[ext] = language
for file in diff_files:
extension_s = '.' + file.filename.rsplit('.')[-1]
language_name = "txt"
if extension_s and (extension_s in extension_to_language):
language_name = extension_to_language[extension_s]
file.language = language_name.lower()
diff_files = set_file_languages(diff_files)
for suggestion in code_suggestions_copy:
try:

View File

@ -111,7 +111,7 @@ class Review(BaseModel):
{%- if question_str %}
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
{%- endif %}
key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate")
key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
{%- if require_security_review %}
security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- endif %}

View File

@ -270,7 +270,9 @@ class PRReviewer:
incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
incremental_review_markdown_text, git_provider=self.git_provider)
incremental_review_markdown_text,
git_provider=self.git_provider,
files=self.git_provider.get_diff_files())
# Add help text if gfm_markdown is supported
if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: