From 7e8361b5fde6ff7ef53ed4260ad03deaafbd2654 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Thu, 19 Dec 2024 20:30:56 +0200
Subject: [PATCH 1/4] feat: enhance code review output with collapsible code
 snippets and variable links

---
 pr_agent/algo/utils.py                     | 84 ++++++++++++++++++----
 pr_agent/git_providers/github_provider.py  | 15 +---
 pr_agent/settings/pr_reviewer_prompts.toml |  2 +-
 pr_agent/tools/pr_reviewer.py              |  4 +-
 4 files changed, 77 insertions(+), 28 deletions(-)

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index cc0db89f..9fab73ec 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -104,7 +104,8 @@ def unique_strings(input_list: List[str]) -> List[str]:
 def convert_to_markdown_v2(output_data: dict,
                            gfm_supported: bool = True,
                            incremental_review=None,
-                           git_provider=None) -> str:
+                           git_provider=None,
+                           files=None) -> str:
     """
     Convert a dictionary of data into markdown format.
     Args:
@@ -180,7 +181,7 @@ def convert_to_markdown_v2(output_data: dict,
                 if is_value_no(value):
                     markdown_text += f'### {emoji} No relevant tests\n\n'
                 else:
-                    markdown_text += f"### {emoji} PR contains tests\n\n"
+                    markdown_text += f"### PR contains tests\n\n"
         elif 'ticket compliance check' in key_nice.lower():
             markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported)
         elif 'security concerns' in key_nice.lower():
@@ -231,21 +232,35 @@ def convert_to_markdown_v2(output_data: dict,
                         issue_content = issue.get('issue_content', '').strip()
                         start_line = int(str(issue.get('start_line', 0)).strip())
                         end_line = int(str(issue.get('end_line', 0)).strip())
-                        if git_provider:
-                            reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
-                        else:
-                            reference_link = None
+
+                        relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line)
+                        reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
 
                         if gfm_supported:
-                            if reference_link is not None and len(reference_link) > 0:
+                            if get_settings().pr_reviewer.extra_issue_links:
+                                issue_content_linked = copy.deepcopy(issue_content)
+                                referenced_variables_list = issue.get('referenced_variables', [])
+                                for component in referenced_variables_list:
+                                    name = component['variable_name'].strip().strip('`')
+
+                                    ind = issue_content.find(name)
+                                    if ind != -1:
+                                        reference_link_component = git_provider.get_line_link(relevant_file, component[
+                                            'relevant_line'], component['relevant_line'])
+                                        issue_content_linked = issue_content_linked[
+                                                               :ind - 1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[
+                                                                                                                       ind + len(
+                                                                                                                           name) + 1:]
+                                    else:
+                                        get_logger().info(
+                                            f"Failed to find variable in issue content: {component['variable_name'].strip()}")
+                                issue_content = issue_content_linked
+                            if relevant_lines_str:
+                                issue_str = f"<details><summary><a href='{reference_link}'><strong>{issue_header}</strong></a>\n\n{issue_content}</summary>\n\n{relevant_lines_str}\n\n</details>"
+                            else:
                                 issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
-                            else:
-                                issue_str = f"<strong>{issue_header}</strong><br>{issue_content}"
                         else:
-                            if reference_link is not None and len(reference_link) > 0:
-                                issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
-                            else:
-                                issue_str = f"**{issue_header}**\n\n{issue_content}\n\n"
+                            issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
                         markdown_text += f"{issue_str}\n\n"
                     except Exception as e:
                         get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}")
@@ -280,6 +295,25 @@ def convert_to_markdown_v2(output_data: dict,
 
     return markdown_text
 
+def extract_relevant_lines_str(end_line, files, relevant_file, start_line):
+    try:
+        relevant_lines_str = ""
+        if files:
+            files = set_file_languages(files)
+            for file in files:
+                if file.filename.strip() == relevant_file:
+                    if not file.head_file:
+                        get_logger().warning(f"No content found in file: {file.filename}")
+                        return ""
+                    relevant_file_lines = file.head_file.splitlines()
+                    relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
+                    relevant_lines_str = f"```{file.language}\n{relevant_lines_str}\n```"
+                    break
+        return relevant_lines_str
+    except Exception as e:
+        get_logger().exception(f"Failed to extract relevant lines: {e}")
+        return ""
+
 
 def ticket_markdown_logic(emoji, markdown_text, value, gfm_supported) -> str:
     ticket_compliance_str = ""
@@ -1134,3 +1168,27 @@ def get_version() -> str:
     except PackageNotFoundError:
         get_logger().warning("Unable to find package named 'pr-agent'")
         return "unknown"
+
+
+def set_file_languages(diff_files) -> List[FilePatchInfo]:
+    try:
+        # if the language is already set, do not change it
+        if hasattr(diff_files[0], 'language') and diff_files[0].language:
+            return diff_files
+
+        # map file extensions to programming languages
+        language_extension_map_org = get_settings().language_extension_map_org
+        extension_to_language = {}
+        for language, extensions in language_extension_map_org.items():
+            for ext in extensions:
+                extension_to_language[ext] = language
+        for file in diff_files:
+            extension_s = '.' + file.filename.rsplit('.')[-1]
+            language_name = "txt"
+            if extension_s and (extension_s in extension_to_language):
+                language_name = extension_to_language[extension_s]
+            file.language = language_name.lower()
+    except Exception as e:
+        get_logger().exception(f"Failed to set file languages: {e}")
+
+    return diff_files
diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py
index cc8e158a..9ee3e2bf 100644
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@@ -19,7 +19,7 @@ from ..algo.language_handler import is_valid_file
 from ..algo.types import EDIT_TYPE
 from ..algo.utils import (PRReviewHeader, Range, clip_tokens,
                           find_line_number_of_relevant_line_in_file,
-                          load_large_diff)
+                          load_large_diff, set_file_languages)
 from ..config_loader import get_settings
 from ..log import get_logger
 from ..servers.utils import RateLimitExceeded
@@ -889,18 +889,7 @@ class GithubProvider(GitProvider):
         RE_HUNK_HEADER = re.compile(
             r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
 
-        # map file extensions to programming languages
-        language_extension_map_org = get_settings().language_extension_map_org
-        extension_to_language = {}
-        for language, extensions in language_extension_map_org.items():
-            for ext in extensions:
-                extension_to_language[ext] = language
-        for file in diff_files:
-            extension_s = '.' + file.filename.rsplit('.')[-1]
-            language_name = "txt"
-            if extension_s and (extension_s in extension_to_language):
-                language_name = extension_to_language[extension_s]
-            file.language = language_name.lower()
+        diff_files = set_file_languages(diff_files)
 
         for suggestion in code_suggestions_copy:
             try:
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 0b61e8ea..a8c02c2c 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -111,7 +111,7 @@ class Review(BaseModel):
 {%- if question_str %}
     insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
 {%- endif %}
-    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate")
+    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
 {%- if require_security_review %}
     security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
 {%- endif %}
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index de41a92f..162c1022 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -270,7 +270,9 @@ class PRReviewer:
             incremental_review_markdown_text = f"Starting from commit {last_commit_url}"
 
         markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
-                                            incremental_review_markdown_text, git_provider=self.git_provider)
+                                            incremental_review_markdown_text,
+                                               git_provider=self.git_provider,
+                                               files=self.git_provider.get_diff_files())
 
         # Add help text if gfm_markdown is supported
         if self.git_provider.is_supported("gfm_markdown") and get_settings().pr_reviewer.enable_help_text:

From 989670b1598f1271e2d5603d84a86f7adc4d9ef0 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Thu, 19 Dec 2024 20:49:40 +0200
Subject: [PATCH 2/4] fix: improve markdown rendering when git provider is
 unavailable

---
 pr_agent/algo/utils.py                     | 39 +++++++++-------------
 pr_agent/settings/pr_reviewer_prompts.toml |  4 +--
 2 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 9fab73ec..93b6b543 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -181,7 +181,7 @@ def convert_to_markdown_v2(output_data: dict,
                 if is_value_no(value):
                     markdown_text += f'### {emoji} No relevant tests\n\n'
                 else:
-                    markdown_text += f"### PR contains tests\n\n"
+                    markdown_text += f"### {emoji} PR contains tests\n\n"
         elif 'ticket compliance check' in key_nice.lower():
             markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported)
         elif 'security concerns' in key_nice.lower():
@@ -234,33 +234,24 @@ def convert_to_markdown_v2(output_data: dict,
                         end_line = int(str(issue.get('end_line', 0)).strip())
 
                         relevant_lines_str = extract_relevant_lines_str(end_line, files, relevant_file, start_line)
-                        reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
+                        if git_provider:
+                            reference_link = git_provider.get_line_link(relevant_file, start_line, end_line)
+                        else:
+                            reference_link = None
 
                         if gfm_supported:
-                            if get_settings().pr_reviewer.extra_issue_links:
-                                issue_content_linked = copy.deepcopy(issue_content)
-                                referenced_variables_list = issue.get('referenced_variables', [])
-                                for component in referenced_variables_list:
-                                    name = component['variable_name'].strip().strip('`')
-
-                                    ind = issue_content.find(name)
-                                    if ind != -1:
-                                        reference_link_component = git_provider.get_line_link(relevant_file, component[
-                                            'relevant_line'], component['relevant_line'])
-                                        issue_content_linked = issue_content_linked[
-                                                               :ind - 1] + f"[`{name}`]({reference_link_component})" + issue_content_linked[
-                                                                                                                       ind + len(
-                                                                                                                           name) + 1:]
-                                    else:
-                                        get_logger().info(
-                                            f"Failed to find variable in issue content: {component['variable_name'].strip()}")
-                                issue_content = issue_content_linked
-                            if relevant_lines_str:
-                                issue_str = f"<details><summary><a href='{reference_link}'><strong>{issue_header}</strong></a>\n\n{issue_content}</summary>\n\n{relevant_lines_str}\n\n</details>"
+                            if reference_link is not None and len(reference_link) > 0:
+                                if relevant_lines_str:
+                                    issue_str = f"<details><summary><a href='{reference_link}'><strong>{issue_header}</strong></a>\n\n{issue_content}</summary>\n\n{relevant_lines_str}\n\n</details>"
+                                else:
+                                    issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
                             else:
-                                issue_str = f"<a href='{reference_link}'><strong>{issue_header}</strong></a><br>{issue_content}"
+                                issue_str = f"<strong>{issue_header}</strong><br>{issue_content}"
                         else:
-                            issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
+                            if reference_link is not None and len(reference_link) > 0:
+                                issue_str = f"[**{issue_header}**]({reference_link})\n\n{issue_content}\n\n"
+                            else:
+                                issue_str = f"**{issue_header}**\n\n{issue_content}\n\n"
                         markdown_text += f"{issue_str}\n\n"
                     except Exception as e:
                         get_logger().exception(f"Failed to process 'Recommended focus areas for review': {e}")
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index a8c02c2c..1822fe14 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -81,7 +81,7 @@ class SubPR(BaseModel):
 class KeyIssuesComponentLink(BaseModel):
     relevant_file: str = Field(description="The full file path of the relevant file")
     issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.")
-    issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Don't state line numbers here")
+    issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.")
     start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
     end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")
 
@@ -111,7 +111,7 @@ class Review(BaseModel):
 {%- if question_str %}
     insights_from_user_answers: str = Field(description="shortly summarize the insights you gained from the user's answers to the questions")
 {%- endif %}
-    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A diverse list of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
+    key_issues_to_review: List[KeyIssuesComponentLink] = Field("A short and diverse list (0-3 issues) of high-priority bugs, problems or performance concerns introduced in the PR code, which the PR reviewer should further focus on and validate during the review process.")
 {%- if require_security_review %}
     security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
 {%- endif %}

From 3ab2cac089ce41bbea109959a9b0b989c1d83aa3 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Thu, 19 Dec 2024 20:59:17 +0200
Subject: [PATCH 3/4] fix: improve markdown rendering when git provider is
 unavailable

---
 pr_agent/algo/utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index 93b6b543..fb0fe2bd 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -229,6 +229,8 @@ def convert_to_markdown_v2(output_data: dict,
                             continue
                         relevant_file = issue.get('relevant_file', '').strip()
                         issue_header = issue.get('issue_header', '').strip()
+                        if issue_header.lower() == 'possible bug':
+                            issue_header = 'Possible Issue'  # Make the header less frightening
                         issue_content = issue.get('issue_content', '').strip()
                         start_line = int(str(issue.get('start_line', 0)).strip())
                         end_line = int(str(issue.get('end_line', 0)).strip())

From c2f1f2dba0cf6b8fd2fab7927032ba1d39cdcdc2 Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Thu, 19 Dec 2024 21:08:27 +0200
Subject: [PATCH 4/4] fix: improve markdown rendering when git provider is
 unavailable

---
 pr_agent/settings/pr_reviewer_prompts.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 1822fe14..6477ecf6 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -80,7 +80,7 @@ class SubPR(BaseModel):
 
 class KeyIssuesComponentLink(BaseModel):
     relevant_file: str = Field(description="The full file path of the relevant file")
-    issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', 'Performance Issue', 'Code Smell', etc.")
+    issue_header: str = Field(description="One or two word title for the the issue. For example: 'Possible Bug', etc.")
     issue_content: str = Field(description="A short and concise summary of what should be further inspected and validated during the PR review process for this issue. Do not reference line numbers in this field.")
     start_line: int = Field(description="The start line that corresponds to this issue in the relevant file")
     end_line: int = Field(description="The end line that corresponds to this issue in the relevant file")