Merge pull request #1159 from Codium-ai/tr/patches

Tr/patches
This commit is contained in:
Tal
2024-08-20 11:41:05 +03:00
committed by GitHub
5 changed files with 57 additions and 41 deletions

View File

@ -7,7 +7,8 @@ from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo
from pr_agent.log import get_logger
def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch_extra_lines_after=0) -> str:
def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0,
patch_extra_lines_after=0, filename: str = "") -> str:
if not patch_str or (patch_extra_lines_before == 0 and patch_extra_lines_after == 0) or not original_file_str:
return patch_str
@ -17,6 +18,13 @@ def extend_patch(original_file_str, patch_str, patch_extra_lines_before=0, patch
except UnicodeDecodeError:
return ""
# skip patches
patch_extension_skip_types = get_settings().config.patch_extension_skip_types #[".md",".txt"]
if patch_extension_skip_types and filename:
if any([filename.endswith(skip_type) for skip_type in patch_extension_skip_types]):
return patch_str
# dynamic context settings
allow_dynamic_context = get_settings().config.allow_dynamic_context
max_extra_lines_before_dynamic_context = get_settings().config.max_extra_lines_before_dynamic_context
patch_extra_lines_before_dynamic = patch_extra_lines_before

View File

@ -191,7 +191,7 @@ def pr_generate_extended_diff(pr_languages: list,
# extend each patch with extra lines of context
extended_patch = extend_patch(original_file_content_str, patch,
patch_extra_lines_before, patch_extra_lines_after)
patch_extra_lines_before, patch_extra_lines_after, file.filename)
if not extended_patch:
get_logger().warning(f"Failed to extend patch for file: {file.filename}")
continue

View File

@ -2,7 +2,7 @@
# models
model="gpt-4-turbo-2024-04-09"
model_turbo="gpt-4o-2024-08-06"
fallback_models=["gpt-4-0125-preview"]
fallback_models=["gpt-4o-2024-05-13"]
# CLI
git_provider="github"
publish_output=true
@ -19,7 +19,8 @@ max_description_tokens = 500
max_commits_tokens = 500
max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities.
custom_model_max_tokens=-1 # for models not in the default list
#
# patch extension logic
patch_extension_skip_types =[".md",".txt"]
allow_dynamic_context=false
max_extra_lines_before_dynamic_context = 10 # will try to include up to 10 extra lines before the hunk in the patch, until we reach an enclosing function or class
patch_extra_lines_before = 3 # Number of extra lines (+3 default ones) to include before each hunk in the patch

View File

@ -1,6 +1,6 @@
[pr_code_suggestions_prompt]
system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+').
The format we will use to present the PR code diff:
@ -9,13 +9,15 @@ The format we will use to present the PR code diff:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
11 unchanged code line0 in the PR
12 unchanged code line1 in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
14 unchanged code line3 in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
unchanged code line0
unchanged code line1
-old code line2 removed in the PR
unchanged code line3
@@ ... @@ def func2():
__new hunk__
@ -27,15 +29,15 @@ __old hunk__
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
Specific instructions for generating code suggestions:
- Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). The '__old hunk__' sections code is for context and reference only.
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
- Suggestions should not repeat code already present in the '__new hunk__' sections.
@ -97,7 +99,7 @@ code_suggestions:
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""
user="""PR Info:
user="""--PR Info--
Title: '{{title}}'
@ -114,8 +116,8 @@ Response (should be a valid YAML, and nothing else):
[pr_code_suggestions_prompt_claude]
system="""You are PR-Reviewer, a language model that specializes in suggesting ways to improve for a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR diff.
system="""You are PR-Reviewer, a language model that specializes in suggesting improvements to a Pull Request (PR) code.
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR code diff (lines starting with '+').
The format we will use to present the PR code diff:
@ -124,13 +126,15 @@ The format we will use to present the PR code diff:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
11 unchanged code line0 in the PR
12 unchanged code line1 in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
14 unchanged code line3 in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
unchanged code line0
unchanged code line1
-old code line2 removed in the PR
unchanged code line3
@@ ... @@ def func2():
__new hunk__
@ -142,15 +146,15 @@ __old hunk__
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- In this format, we separate each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
Specific instructions for generating code suggestions:
- Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
- The suggestions should focus on improving only the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
- Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
- Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
- Provide the exact line numbers range (inclusive) for each suggestion. Use the line numbers from the '__new hunk__' sections.
@ -173,7 +177,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file")
language: str = Field(description="the programming language of the relevant file")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise")
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")

View File

@ -5,7 +5,7 @@ Your task is to provide constructive and concise feedback for the PR, and also p
{%- else %}
Your task is to provide constructive and concise feedback for the PR.
{%- endif %}
The review should focus on new code added in the PR diff (lines starting with '+')
The review should focus on new code added in the PR code diff (lines starting with '+')
The format we will use to present the PR code diff:
@ -14,13 +14,15 @@ The format we will use to present the PR code diff:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
11 unchanged code line0 in the PR
12 unchanged code line1 in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
14 unchanged code line3 in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
unchanged code line0
unchanged code line1
-old code line2 removed in the PR
unchanged code line3
@@ ... @@ def func2():
__new hunk__
@ -32,10 +34,11 @@ __old hunk__
## file: 'src/file2.py'
...
======
- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
The review should focus on new code added in the PR diff (lines starting with '+')
The review should address new code added in the PR code diff (lines starting with '+')
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
{%- if num_code_suggestions > 0 %}
@ -46,7 +49,7 @@ Code suggestions guidelines:
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the PR code.
- Don't suggest to add docstring, type hints, or comments.
- Suggestions should focus on the new code added in the PR diff (lines starting with '+')
- Suggestions should address the new code added in the PR diff (lines starting with '+')
{%- endif %}
{%- if extra_instructions %}
@ -98,7 +101,7 @@ class Review(BaseModel):
{%- endif %}
key_issues_to_review: List[KeyIssuesComponentLink] = Field("A list of bugs, issue or major performance concerns introduced in this PR, which the PR reviewer should further investigate")
{%- if require_security_review %}
security_concerns: str = Field(description="does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
security_concerns: str = Field(description="Does this PR code introduce possible vulnerabilities such as exposure of sensitive information (e.g., API keys, secrets, passwords), or security concerns like SQL injection, XSS, CSRF, and others ? Answer 'No' (without explaining why) if there are no possible issues. If there are security concerns or issues, start your answer with a short header, such as: 'Sensitive information exposure: ...', 'SQL injection: ...' etc. Explain your answer. Be specific and give examples if possible")
{%- endif %}
{%- if require_can_be_split_review %}
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
@ -180,7 +183,7 @@ code_feedback:
Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
"""
user="""PR Info:
user="""--PR Info--
Title: '{{title}}'
@ -188,7 +191,7 @@ Branch: '{{branch}}'
{%- if description %}
Description:
PR Description:
======
{{ description|trim }}
======
@ -209,7 +212,7 @@ User answers:
{%- endif %}
The PR Diff:
The PR code diff:
======
{{ diff|trim }}
======