From b7eb6be5a0c588167d06b85b8e03f6abf918331e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 20 Aug 2024 11:27:35 +0300 Subject: [PATCH] Update PR code suggestions and reviewer prompts for clarity and consistency --- pr_agent/algo/pr_processing.py | 2 +- .../settings/pr_code_suggestions_prompts.toml | 52 ++++++++++--------- pr_agent/settings/pr_reviewer_prompts.toml | 29 ++++++----- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index a467a2d1..d8708ddc 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -191,7 +191,7 @@ def pr_generate_extended_diff(pr_languages: list, # extend each patch with extra lines of context extended_patch = extend_patch(original_file_content_str, patch, - patch_extra_lines_before, patch_extra_lines_after) + patch_extra_lines_before, patch_extra_lines_after, file.filename) if not extended_patch: get_logger().warning(f"Failed to extend patch for file: {file.filename}") continue diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 9b57891d..4bd5f7fb 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,6 +1,6 @@ [pr_code_suggestions_prompt] -system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code. -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff. +system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code. +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+'). The format we will use to present the PR code diff: @@ -9,13 +9,15 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ -12 code line1 that remained unchanged in the PR +11 unchanged code line0 in the PR +12 unchanged code line1 in the PR 13 +new code line2 added in the PR -14 code line3 that remained unchanged in the PR +14 unchanged code line3 in the PR __old hunk__ - code line1 that remained unchanged in the PR --old code line2 that was removed in the PR - code line3 that remained unchanged in the PR + unchanged code line0 + unchanged code line1 +-old code line2 removed in the PR + unchanged code line3 @@ ... @@ def func2(): __new hunk__ @@ -27,15 +29,15 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. -- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. + +- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. +- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ -Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. Specific instructions for generating code suggestions: - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). The '__old hunk__' sections code is for context and reference only. - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Suggestions should not repeat code already present in the '__new hunk__' sections. @@ -97,7 +99,7 @@ code_suggestions: Each YAML output MUST be after a newline, indented, with block scalar indicator ('|'). """ -user="""PR Info: +user="""--PR Info-- Title: '{{title}}' @@ -114,8 +116,8 @@ Response (should be a valid YAML, and nothing else): [pr_code_suggestions_prompt_claude] -system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code. -Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff. +system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code. +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+'). The format we will use to present the PR code diff: @@ -124,13 +126,15 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ -12 code line1 that remained unchanged in the PR +11 unchanged code line0 in the PR +12 unchanged code line1 in the PR 13 +new code line2 added in the PR -14 code line3 that remained unchanged in the PR +14 unchanged code line3 in the PR __old hunk__ - code line1 that remained unchanged in the PR --old code line2 that was removed in the PR - code line3 that remained unchanged in the PR + unchanged code line0 + unchanged code line1 +-old code line2 removed in the PR + unchanged code line3 @@ ... @@ def func2(): __new hunk__ @@ -142,15 +146,15 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. -- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. + +- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. +- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ -Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. Specific instructions for generating code suggestions: - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). +- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections. @@ -173,7 +177,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin class CodeSuggestion(BaseModel): relevant_file: str = Field(description="The full file path of the relevant file") language: str = Field(description="the programming language of the relevant file") - suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ") + suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise") existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed") improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion") one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.") diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index b3cbfce3..6a4e84ef 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -5,7 +5,7 @@ Your task is to provide constructive and concise feedback for the PR, and also p {%- else %} Your task is to provide constructive and concise feedback for the PR. {%- endif %} -The review should focus on new code added in the PR diff (lines starting with '+') +The review should focus on new code added in the PR code diff (lines starting with '+') The format we will use to present the PR code diff: @@ -14,13 +14,15 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ -12 code line1 that remained unchanged in the PR +11 unchanged code line0 in the PR +12 unchanged code line1 in the PR 13 +new code line2 added in the PR -14 code line3 that remained unchanged in the PR +14 unchanged code line3 in the PR __old hunk__ - code line1 that remained unchanged in the PR --old code line2 that was removed in the PR - code line3 that remained unchanged in the PR + unchanged code line0 + unchanged code line1 +-old code line2 removed in the PR + unchanged code line3 @@ ... @@ def func2(): __new hunk__ @@ -32,10 +34,11 @@ __old hunk__ ## file: 'src/file2.py' ... ====== + - In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. -- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. +- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ -The review should focus on new code added in the PR diff (lines starting with '+') + The review should address new code added in the PR code diff (lines starting with '+') - When quoting variables or names from the code, use backticks (`) instead of single quote ('). {%- if num_code_suggestions > 0 %} @@ -46,7 +49,7 @@ Code suggestions guidelines: - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code. - Don't suggest to add docstring, type hints, or comments. -- Suggestions should focus on the new code added in the PR diff (lines starting with '+') +- Suggestions should address the new code added in the PR diff (lines starting with '+') {%- endif %} {%- if extra_instructions %} @@ -98,7 +101,7 @@ class Review(BaseModel): {%- endif %} key_issues_to_review: List[KeyIssuesComponentLink] = Field("A list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate") {%- if require_security_review %} - security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") + security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible") {%- endif %} {%- if require_can_be_split_review %} can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.") @@ -180,7 +183,7 @@ code_feedback: Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|') """ -user="""PR Info: +user="""--PR Info-- Title: '{{title}}' @@ -188,7 +191,7 @@ Branch: '{{branch}}' {%- if description %} -Description: +PR Description: ====== {{ description|trim }} ====== @@ -209,7 +212,7 @@ User answers: {%- endif %} -The PR Diff: +The PR code diff: ====== {{ diff|trim }} ======