From b7eb6be5a0c588167d06b85b8e03f6abf918331e Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Tue, 20 Aug 2024 11:27:35 +0300
Subject: [PATCH] Update PR code suggestions and reviewer prompts for clarity
 and consistency

---
 pr_agent/algo/pr_processing.py                |  2 +-
 .../settings/pr_code_suggestions_prompts.toml | 52 ++++++++++---------
 pr_agent/settings/pr_reviewer_prompts.toml    | 29 ++++++-----
 3 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index a467a2d1..d8708ddc 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -191,7 +191,7 @@ def pr_generate_extended_diff(pr_languages: list,
 
             # extend each patch with extra lines of context
             extended_patch = extend_patch(original_file_content_str, patch,
-                                          patch_extra_lines_before, patch_extra_lines_after)
+                                          patch_extra_lines_before, patch_extra_lines_after, file.filename)
             if not extended_patch:
                 get_logger().warning(f"Failed to extend patch for file: {file.filename}")
                 continue
diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml
index 9b57891d..4bd5f7fb 100644
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@@ -1,6 +1,6 @@
 [pr_code_suggestions_prompt]
-system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
-Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
+system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code.
+Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+').
 
 
 The format we will use to present the PR code diff:
@@ -9,13 +9,15 @@ The format we will use to present the PR code diff:
 
 @@ ... @@ def func1():
 __new hunk__
-12  code line1 that remained unchanged in the PR
+11  unchanged code line0 in the PR
+12  unchanged code line1 in the PR
 13 +new code line2 added in the PR
-14  code line3 that remained unchanged in the PR
+14  unchanged code line3 in the PR
 __old hunk__
- code line1 that remained unchanged in the PR
--old code line2 that was removed in the PR
- code line3 that remained unchanged in the PR
+ unchanged code line0
+ unchanged code line1
+-old code line2 removed in the PR
+ unchanged code line3
 
 @@ ... @@ def func2():
 __new hunk__
@@ -27,15 +29,15 @@ __old hunk__
 ## file: 'src/file2.py'
 ...
 ======
-- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
-- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
+
+- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
+- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference.
 - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
-Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
 
 
 Specific instructions for generating code suggestions:
 - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
-- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
+- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). The '__old hunk__' sections code is for context and reference only.
 - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
 - Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
 - Suggestions should not repeat code already present in the '__new hunk__' sections.
@@ -97,7 +99,7 @@ code_suggestions:
 Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
 """
 
-user="""PR Info:
+user="""--PR Info--
 
 Title: '{{title}}'
 
@@ -114,8 +116,8 @@ Response (should be a valid YAML, and nothing else):
 
 
 [pr_code_suggestions_prompt_claude]
-system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
-Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
+system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code.
+Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+').
 
 
 The format we will use to present the PR code diff:
@@ -124,13 +126,15 @@ The format we will use to present the PR code diff:
 
 @@ ... @@ def func1():
 __new hunk__
-12  code line1 that remained unchanged in the PR
+11  unchanged code line0 in the PR
+12  unchanged code line1 in the PR
 13 +new code line2 added in the PR
-14  code line3 that remained unchanged in the PR
+14  unchanged code line3 in the PR
 __old hunk__
- code line1 that remained unchanged in the PR
--old code line2 that was removed in the PR
- code line3 that remained unchanged in the PR
+ unchanged code line0
+ unchanged code line1
+-old code line2 removed in the PR
+ unchanged code line3
 
 @@ ... @@ def func2():
 __new hunk__
@@ -142,15 +146,15 @@ __old hunk__
 ## file: 'src/file2.py'
 ...
 ======
-- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
-- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
+
+- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
+- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference.
 - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
-Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
 
 
 Specific instructions for generating code suggestions:
 - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
-- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
+- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
 - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
 - Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
 - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
@@ -173,7 +177,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin
 class CodeSuggestion(BaseModel):
     relevant_file: str = Field(description="The full file path of the relevant file")
     language: str = Field(description="the programming language of the relevant file")
-    suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ")
+    suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
     existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed")
     improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
     one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index b3cbfce3..6a4e84ef 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -5,7 +5,7 @@ Your task is to provide constructive and concise feedback for the PR, and also p
 {%- else %}
 Your task is to provide constructive and concise feedback for the PR.
 {%- endif %}
-The review should focus on new code added in the PR diff (lines starting with '+')
+The review should focus on new code added in the PR code diff (lines starting with '+')
 
 
 The format we will use to present the PR code diff:
@@ -14,13 +14,15 @@ The format we will use to present the PR code diff:
 
 @@ ... @@ def func1():
 __new hunk__
-12  code line1 that remained unchanged in the PR
+11  unchanged code line0 in the PR
+12  unchanged code line1 in the PR
 13 +new code line2 added in the PR
-14  code line3 that remained unchanged in the PR
+14  unchanged code line3 in the PR
 __old hunk__
- code line1 that remained unchanged in the PR
--old code line2 that was removed in the PR
- code line3 that remained unchanged in the PR
+ unchanged code line0
+ unchanged code line1
+-old code line2 removed in the PR
+ unchanged code line3
 
 @@ ... @@ def func2():
 __new hunk__
@@ -32,10 +34,11 @@ __old hunk__
 ## file: 'src/file2.py'
 ...
 ======
+
 - In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
-- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
+- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference.
 - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
-The review should focus on new code added in the PR diff (lines starting with '+')
+ The review should address new code added in the PR code diff (lines starting with '+')
 - When quoting variables or names from the code, use backticks (`) instead of single quote (').
 
 {%- if num_code_suggestions > 0 %}
@@ -46,7 +49,7 @@ Code suggestions guidelines:
 - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
 - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code.
 - Don't suggest to add docstring, type hints, or comments.
-- Suggestions should focus on the new code added in the PR diff (lines starting with '+')
+- Suggestions should address the new code added in the PR diff (lines starting with '+')
 {%- endif %}
 
 {%- if extra_instructions %}
@@ -98,7 +101,7 @@ class Review(BaseModel):
 {%- endif %}
     key_issues_to_review: List[KeyIssuesComponentLink] = Field("A list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate")
 {%- if require_security_review %}
-    security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
+    security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
 {%- endif %}
 {%- if require_can_be_split_review %}
     can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
@@ -180,7 +183,7 @@ code_feedback:
 Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
 """
 
-user="""PR Info:
+user="""--PR Info--
 
 Title: '{{title}}'
 
@@ -188,7 +191,7 @@ Branch: '{{branch}}'
 
 {%- if description %}
 
-Description:
+PR Description:
 ======
 {{ description|trim }}
 ======
@@ -209,7 +212,7 @@ User answers:
 {%- endif %}
 
 
-The PR Diff:
+The PR code diff:
 ======
 {{ diff|trim }}
 ======