diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index cc0db89f..fb0fe2bd 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -104,7 +104,8 @@ def unique_strings(input_list: List[str]) -> List[str]: def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None, - git_provider=None) -> str: + git_provider=None, + files=None) -> str: """ Convert a dictionary of data into markdown format. Args: @@ -228,9 +229,13 @@ def convert_to_markdown_v2(output_data: dict, continue relevant_file = issue.get('relevant_file', '').strip() issue_header = issue.get('issue_header', '').strip() + if issue_header.lower() == 'possible bug': + issue_header = 'Possible Issue' # Make the header less frightening issue_content = issue.get('issue_content', '').strip() start_line = int(str(issue.get('start_line', 0)).strip()) end_line = int(str(issue.get('end_line', 0)).strip()) + + relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line) if git_provider: reference_link = git_provider.get_line_link(relevant_file, start_line, end_line) else: @@ -238,7 +243,10 @@ def convert_to_markdown_v2(output_data: dict, if gfm_supported: if reference_link is not None and len(reference_link) > 0: - issue_str = f"{issue_header}
{issue_content}" + if relevant_lines_str: + issue_str = f"
{issue_header}\n\n{issue_content}\n\n{relevant_lines_str}\n\n
" + else: + issue_str = f"{issue_header}
{issue_content}" else: issue_str = f"{issue_header}
{issue_content}" else: @@ -280,6 +288,25 @@ def convert_to_markdown_v2(output_data: dict, return markdown_text +def extract_relevant_lines_str(end_line, files, relevant_file, start_line): + try: + relevant_lines_str = "" + if files: + files = set_file_languages(files) + for file in files: + if file.filename.strip() == relevant_file: + if not file.head_file: + get_logger().warning(f"No content found in file: {file.filename}") + return "" + relevant_file_lines = file.head_file.splitlines() + relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line]) + relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```" + break + return relevant_lines_str + except Exception as e: + get_logger().exception(f"Failed to extract relevant lines: {e}") + return "" + def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str: ticket_compliance_str = "" @@ -1134,3 +1161,27 @@ def get_version() -> str: except PackageNotFoundError: get_logger().warning("Unable to find package named 'pr-agent'") return "unknown" + + +def set_file_languages(diff_files) -> List[FilePatchInfo]: + try: + # if the language is already set, do not change it + if hasattr(diff_files[0], 'language') and diff_files[0].language: + return diff_files + + # map file extensions to programming languages + language_extension_map_org = get_settings().language_extension_map_org + extension_to_language = {} + for language, extensions in language_extension_map_org.items(): + for ext in extensions: + extension_to_language[ext] = language + for file in diff_files: + extension_s = '.' + file.filename.rsplit('.')[-1] + language_name = "txt" + if extension_s and (extension_s in extension_to_language): + language_name = extension_to_language[extension_s] + file.language = language_name.lower() + except Exception as e: + get_logger().exception(f"Failed to set file languages: {e}") + + return diff_files diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index cc8e158a..9ee3e2bf 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -19,7 +19,7 @@ from ..algo.language_handler import is_valid_file from ..algo.types import EDIT_TYPE from ..algo.utils import (PRReviewHeader, Range, clip_tokens, find_line_number_of_relevant_line_in_file, - load_large_diff) + load_large_diff, set_file_languages) from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded @@ -889,18 +889,7 @@ class GithubProvider(GitProvider): RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") - # map file extensions to programming languages - language_extension_map_org = get_settings().language_extension_map_org - extension_to_language = {} - for language, extensions in language_extension_map_org.items(): - for ext in extensions: - extension_to_language[ext] = language - for file in diff_files: - extension_s = '.' + file.filename.rsplit('.')[-1] - language_name = "txt" - if extension_s and (extension_s in extension_to_language): - language_name = extension_to_language[extension_s] - file.language = language_name.lower() + diff_files = set_file_languages(diff_files) for suggestion in code_suggestions_copy: try: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 0b61e8ea..6477ecf6 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -80,8 +80,8 @@ class SubPR(BaseModel): class KeyIssuesComponentLink(BaseModel): relevant_file: str = Field(description="The full file path of the relevant file") - issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.") - issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Don't state line numbers here") + issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', etc.") + issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.") start_line: int = Field(description="The start line that corresponds to this issue in the relevant file") end_line: int = Field(description="The end line that corresponds to this issue in the relevant file") @@ -111,7 +111,7 @@ class Review(BaseModel): {%- if question_str %} insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions") {%- endif %} - key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate") + key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-3 issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.") {%- if require_security_review %} security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- endif %} diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index de41a92f..162c1022 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -270,7 +270,9 @@ class PRReviewer: incremental_review_markdown_text = f"Starting from commit {last_commit_url}" markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"), - incremental_review_markdown_text, git_provider=self.git_provider) + incremental_review_markdown_text, + git_provider=self.git_provider, + files=self.git_provider.get_diff_files()) # Add help text if gfm_markdown is supported if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text: