patch improvements

This commit is contained in:
mrT23
2024-08-03 12:44:49 +03:00
parent 1cefd23739
commit 3420e6f30d
5 changed files with 46 additions and 36 deletions

View File

@ -1,4 +1,4 @@
FROM python:3.10 AS base
FROM python:3.12 AS base
WORKDIR /app
ADD pyproject.toml .

View File

@ -183,7 +183,6 @@ __old hunk__
line6
...
"""
patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n"
patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(
@ -193,7 +192,7 @@ __old hunk__
match = None
start1, size1, start2, size2 = -1, -1, -1, -1
prev_header_line = []
header_line =[]
header_line = []
for line in patch_lines:
if 'no newline at end of file' in line.lower():
continue
@ -201,17 +200,21 @@ __old hunk__
if line.startswith('@@'):
header_line = line
match = RE_HUNK_HEADER.match(line)
if match and new_content_lines: # found a new hunk, split the previous lines
if match and (new_content_lines or old_content_lines): # found a new hunk, split the previous lines
if prev_header_line:
patch_with_lines_str += f'\n{prev_header_line}\n'
if new_content_lines:
if prev_header_line:
patch_with_lines_str += f'\n{prev_header_line}\n'
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
is_plus_lines = any([line.startswith('+') for line in new_content_lines])
if is_plus_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines:
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__old hunk__\n'
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
is_minus_lines = any([line.startswith('-') for line in old_content_lines])
if is_minus_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
new_content_lines = []
old_content_lines = []
if match:
@ -223,7 +226,7 @@ __old hunk__
res[i] = 0
try:
start1, size1, start2, size2 = map(int, res[:4])
except: # '@@ -0,0 +1 @@' case
except: # '@@ -0,0 +1 @@' case
start1, size1, size2 = map(int, res[:3])
start2 = 0
@ -237,15 +240,19 @@ __old hunk__
# finishing last hunk
if match and new_content_lines:
patch_with_lines_str += f'\n{header_line}\n'
if new_content_lines:
patch_with_lines_str += f'\n{header_line}\n'
patch_with_lines_str = patch_with_lines_str.rstrip()+ '\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
is_plus_lines = any([line.startswith('+') for line in new_content_lines])
if is_plus_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
is_minus_lines = any([line.startswith('-') for line in old_content_lines])
if is_minus_lines:
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n"
return patch_with_lines_str.rstrip()

View File

@ -10,11 +10,11 @@ The format we will use to present the PR code diff:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2():
@ -31,6 +31,7 @@ __old hunk__
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
- If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
Specific instructions for generating code suggestions:
@ -57,10 +58,10 @@ Extra instructions from the user, that should be taken into account with high pr
The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file.")
language: str = Field(description="The programming language of the relevant file.")
relevant_file: str = Field(description="The full file path of the relevant file")
language: str = Field(description="The programming language of the relevant file")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations if needed")
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
@ -125,11 +126,11 @@ The format we will use to present the PR code diff:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2():
@ -146,6 +147,7 @@ __old hunk__
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.
- If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
Specific instructions for generating code suggestions:
@ -171,10 +173,10 @@ Extra instructions from the user, that should be taken into account with high pr
The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions:
=====
class CodeSuggestion(BaseModel):
relevant_file: str = Field(description="The full file path of the relevant file.")
language: str = Field(description="The programming language of the relevant file.")
relevant_file: str = Field(description="The full file path of the relevant file")
language: str = Field(description="the programming language of the relevant file")
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ")
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations ("...") if needed")
existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed")
improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion")
one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.")
relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above")
@ -210,4 +212,4 @@ code_suggestions:
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|').
"""
"""

View File

@ -21,11 +21,11 @@ The format that is used to present the PR code diff is as follows:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2():
@ -39,11 +39,11 @@ __old hunk__
...
======
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
- If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- We added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions:
=====
class CodeSuggestionFeedback(BaseModel):

View File

@ -15,11 +15,11 @@ The format we will use to present the PR code diff:
@@ ... @@ def func1():
__new hunk__
12 code line1 that remained unchanged in the PR
13 +new hunk code line2 added in the PR
13 +new code line2 added in the PR
14 code line3 that remained unchanged in the PR
__old hunk__
code line1 that remained unchanged in the PR
-old hunk code line2 that was removed in the PR
-old code line2 that was removed in the PR
code line3 that remained unchanged in the PR
@@ ... @@ def func2():
@ -36,6 +36,7 @@ __old hunk__
- We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
The review should focus on new code added in the PR diff (lines starting with '+')
- If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented.
- When quoting variables or names from the code, use backticks (`) instead of single quote (').
{%- if num_code_suggestions > 0 %}