diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index cc0db89f..fb0fe2bd 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -104,7 +104,8 @@ def unique_strings(input_list: List[str]) -> List[str]:
def convert_to_markdown_v2(output_data: dict,
gfm_supported: bool = True,
incremental_review=None,
- git_provider=None) -> str:
+ git_provider=None,
+ files=None) -> str:
"""
Convert a dictionary of data into markdown format.
Args:
@@ -228,9 +229,13 @@ def convert_to_markdown_v2(output_data: dict,
continue
relevant_file = issue.get('relevant_file', '').strip()
issue_header = issue.get('issue_header', '').strip()
+ if issue_header.lower() == 'possible bug':
+ issue_header = 'Possible Issue' # Make the header less frightening
issue_content = issue.get('issue_content', '').strip()
start_line = int(str(issue.get('start_line', 0)).strip())
end_line = int(str(issue.get('end_line', 0)).strip())
+
+ relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line)
if git_provider:
reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
else:
@@ -238,7 +243,10 @@ def convert_to_markdown_v2(output_data: dict,
if gfm_supported:
if reference_link is not None and len(reference_link) > 0:
- issue_str = f"{issue_header}
{issue_content}"
+ if relevant_lines_str:
+ issue_str = f"{issue_header}\n\n{issue_content}
\n\n{relevant_lines_str}\n\n "
+ else:
+ issue_str = f"{issue_header}
{issue_content}"
else:
issue_str = f"{issue_header}
{issue_content}"
else:
@@ -280,6 +288,25 @@ def convert_to_markdown_v2(output_data: dict,
return markdown_text
+def extract_relevant_lines_str(end_line, files, relevant_file, start_line):
+ try:
+ relevant_lines_str = ""
+ if files:
+ files = set_file_languages(files)
+ for file in files:
+ if file.filename.strip() == relevant_file:
+ if not file.head_file:
+ get_logger().warning(f"No content found in file: {file.filename}")
+ return ""
+ relevant_file_lines = file.head_file.splitlines()
+ relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
+ relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```"
+ break
+ return relevant_lines_str
+ except Exception as e:
+ get_logger().exception(f"Failed to extract relevant lines: {e}")
+ return ""
+
def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str:
ticket_compliance_str = ""
@@ -1134,3 +1161,27 @@ def get_version() -> str:
except PackageNotFoundError:
get_logger().warning("Unable to find package named 'pr-agent'")
return "unknown"
+
+
+def set_file_languages(diff_files) -> List[FilePatchInfo]:
+ try:
+ # if the language is already set, do not change it
+ if hasattr(diff_files[0], 'language') and diff_files[0].language:
+ return diff_files
+
+ # map file extensions to programming languages
+ language_extension_map_org = get_settings().language_extension_map_org
+ extension_to_language = {}
+ for language, extensions in language_extension_map_org.items():
+ for ext in extensions:
+ extension_to_language[ext] = language
+ for file in diff_files:
+ extension_s = '.' + file.filename.rsplit('.')[-1]
+ language_name = "txt"
+ if extension_s and (extension_s in extension_to_language):
+ language_name = extension_to_language[extension_s]
+ file.language = language_name.lower()
+ except Exception as e:
+ get_logger().exception(f"Failed to set file languages: {e}")
+
+ return diff_files
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index cc8e158a..9ee3e2bf 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -19,7 +19,7 @@ from ..algo.language_handler import is_valid_file
from ..algo.types import EDIT_TYPE
from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
find_line_number_of_relevant_line_in_file,
- load_large_diff)
+ load_large_diff, set_file_languages)
from ..config_loader import get_settings
from ..log import get_logger
from ..servers.utils import RateLimitExceeded
@@ -889,18 +889,7 @@ class GithubProvider(GitProvider):
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
- # map file extensions to programming languages
- language_extension_map_org = get_settings().language_extension_map_org
- extension_to_language = {}
- for language, extensions in language_extension_map_org.items():
- for ext in extensions:
- extension_to_language[ext] = language
- for file in diff_files:
- extension_s = '.' + file.filename.rsplit('.')[-1]
- language_name = "txt"
- if extension_s and (extension_s in extension_to_language):
- language_name = extension_to_language[extension_s]
- file.language = language_name.lower()
+ diff_files = set_file_languages(diff_files)
for suggestion in code_suggestions_copy:
try:
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 0b61e8ea..6477ecf6 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -80,8 +80,8 @@ class SubPR(BaseModel):
class KeyIssuesComponentLink(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file")
- issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.")
- issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Don't state line numbers here")
+ issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', etc.")
+ issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.")
start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
@@ -111,7 +111,7 @@ class Review(BaseModel):
{%- if question_str %}
insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
{%- endif %}
- key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate")
+ key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-3 issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
{%- if require_security_review %}
security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- endif %}
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index de41a92f..162c1022 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -270,7 +270,9 @@ class PRReviewer:
incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
- incremental_review_markdown_text, git_provider=self.git_provider)
+ incremental_review_markdown_text,
+ git_provider=self.git_provider,
+ files=self.git_provider.get_diff_files())
# Add help text if gfm_markdown is supported
if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: