diff --git a/docker/Dockerfile b/docker/Dockerfile index 4e8ccd26..601e16ec 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10 AS base +FROM python:3.12.3 AS base WORKDIR /app ADD pyproject.toml . diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index c611aaf3..15343c97 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -183,7 +183,6 @@ __old hunk__ line6 ... """ - patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n" patch_lines = patch.splitlines() RE_HUNK_HEADER = re.compile( @@ -193,7 +192,7 @@ __old hunk__ match = None start1, size1, start2, size2 = -1, -1, -1, -1 prev_header_line = [] - header_line =[] + header_line = [] for line in patch_lines: if 'no newline at end of file' in line.lower(): continue @@ -201,17 +200,21 @@ __old hunk__ if line.startswith('@@'): header_line = line match = RE_HUNK_HEADER.match(line) - if match and new_content_lines: # found a new hunk, split the previous lines + if match and (new_content_lines or old_content_lines): # found a new hunk, split the previous lines + if prev_header_line: + patch_with_lines_str += f'\n{prev_header_line}\n' if new_content_lines: - if prev_header_line: - patch_with_lines_str += f'\n{prev_header_line}\n' - patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__new hunk__\n' - for i, line_new in enumerate(new_content_lines): - patch_with_lines_str += f"{start2 + i} {line_new}\n" + is_plus_lines = any([line.startswith('+') for line in new_content_lines]) + if is_plus_lines: + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n' + for i, line_new in enumerate(new_content_lines): + patch_with_lines_str += f"{start2 + i} {line_new}\n" if old_content_lines: - patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__old hunk__\n' - for line_old in old_content_lines: - patch_with_lines_str += f"{line_old}\n" + is_minus_lines = any([line.startswith('-') for line in old_content_lines]) + if is_minus_lines: + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' + for line_old in old_content_lines: + patch_with_lines_str += f"{line_old}\n" new_content_lines = [] old_content_lines = [] if match: @@ -223,7 +226,7 @@ __old hunk__ res[i] = 0 try: start1, size1, start2, size2 = map(int, res[:4]) - except: # '@@ -0,0 +1 @@' case + except: # '@@ -0,0 +1 @@' case start1, size1, size2 = map(int, res[:3]) start2 = 0 @@ -237,15 +240,19 @@ __old hunk__ # finishing last hunk if match and new_content_lines: + patch_with_lines_str += f'\n{header_line}\n' if new_content_lines: - patch_with_lines_str += f'\n{header_line}\n' - patch_with_lines_str = patch_with_lines_str.rstrip()+ '\n__new hunk__\n' - for i, line_new in enumerate(new_content_lines): - patch_with_lines_str += f"{start2 + i} {line_new}\n" + is_plus_lines = any([line.startswith('+') for line in new_content_lines]) + if is_plus_lines: + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__new hunk__\n' + for i, line_new in enumerate(new_content_lines): + patch_with_lines_str += f"{start2 + i} {line_new}\n" if old_content_lines: - patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' - for line_old in old_content_lines: - patch_with_lines_str += f"{line_old}\n" + is_minus_lines = any([line.startswith('-') for line in old_content_lines]) + if is_minus_lines: + patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' + for line_old in old_content_lines: + patch_with_lines_str += f"{line_old}\n" return patch_with_lines_str.rstrip() diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 4d68ad7b..b77146c9 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -10,11 +10,11 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ 12 code line1 that remained unchanged in the PR -13 +new hunk code line2 added in the PR +13 +new code line2 added in the PR 14 code line3 that remained unchanged in the PR __old hunk__ code line1 that remained unchanged in the PR --old hunk code line2 that was removed in the PR +-old code line2 that was removed in the PR code line3 that remained unchanged in the PR @@ ... @@ def func2(): @@ -27,7 +27,7 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. +- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. - We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. @@ -57,10 +57,10 @@ Extra instructions from the user, that should be taken into account with high pr The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions: ===== class CodeSuggestion(BaseModel): - relevant_file: str = Field(description="The full file path of the relevant file.") - language: str = Field(description="The programming language of the relevant file.") + relevant_file: str = Field(description="The full file path of the relevant file") + language: str = Field(description="The programming language of the relevant file") suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR") - existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations if needed") + existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed") improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion") one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.") relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above") @@ -125,11 +125,11 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ 12 code line1 that remained unchanged in the PR -13 +new hunk code line2 added in the PR +13 +new code line2 added in the PR 14 code line3 that remained unchanged in the PR __old hunk__ code line1 that remained unchanged in the PR --old hunk code line2 that was removed in the PR +-old code line2 that was removed in the PR code line3 that remained unchanged in the PR @@ ... @@ def func2(): @@ -142,12 +142,11 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. +- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. - We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. - Specific instructions for generating code suggestions: - Provide in total up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. - The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). @@ -171,10 +170,10 @@ Extra instructions from the user, that should be taken into account with high pr The output must be a YAML object equivalent to type $PRCodeSuggestions, according to the following Pydantic definitions: ===== class CodeSuggestion(BaseModel): - relevant_file: str = Field(description="The full file path of the relevant file.") - language: str = Field(description="The programming language of the relevant file.") + relevant_file: str = Field(description="The full file path of the relevant file") + language: str = Field(description="the programming language of the relevant file") suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR. Don't present here actual code snippets, just the suggestion. Be short and concise ") - existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Use abbreviations ("...") if needed") + existing_code: str = Field(description="a short code snippet, demonstrating the relevant code lines from a '__new hunk__' section. It must be without line numbers. Quote only full code lines, not partial ones. Use abbreviations ("...") of full lines if needed") improved_code: str = Field(description="a new code snippet, that can be used to replace the relevant 'existing_code' lines in '__new hunk__' code after applying the suggestion") one_sentence_summary: str = Field(description="a short summary of the suggestion action, in a single sentence. Focus on the 'what'. Be general, and avoid method or variable names.") relevant_lines_start: int = Field(description="The relevant line number, from a '__new hunk__' section, where the suggestion starts (inclusive). Should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above") @@ -210,4 +209,4 @@ code_suggestions: Each YAML output MUST be after a newline, indented, with block scalar indicator ('|'). -""" \ No newline at end of file +""" diff --git a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml index 265116dd..9e21f32f 100644 --- a/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_reflect_prompts.toml @@ -21,11 +21,11 @@ The format that is used to present the PR code diff is as follows: @@ ... @@ def func1(): __new hunk__ 12 code line1 that remained unchanged in the PR -13 +new hunk code line2 added in the PR +13 +new code line2 added in the PR 14 code line3 that remained unchanged in the PR __old hunk__ code line1 that remained unchanged in the PR --old hunk code line2 that was removed in the PR +-old code line2 that was removed in the PR code line3 that remained unchanged in the PR @@ ... @@ def func2(): @@ -39,11 +39,11 @@ __old hunk__ ... ====== - In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed. +- If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. - We added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. - Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. - The output must be a YAML object equivalent to type $PRCodeSuggestionsFeedback, according to the following Pydantic definitions: ===== class CodeSuggestionFeedback(BaseModel): diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 7d8ea24b..b3cbfce3 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -15,11 +15,11 @@ The format we will use to present the PR code diff: @@ ... @@ def func1(): __new hunk__ 12 code line1 that remained unchanged in the PR -13 +new hunk code line2 added in the PR +13 +new code line2 added in the PR 14 code line3 that remained unchanged in the PR __old hunk__ code line1 that remained unchanged in the PR --old hunk code line2 that was removed in the PR +-old code line2 that was removed in the PR code line3 that remained unchanged in the PR @@ ... @@ def func2(): @@ -32,7 +32,7 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. +- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. If no new code was added in a specific hunk, '__new hunk__' section will not be presented. If no code was removed, '__old hunk__' section will not be presented. - We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ The review should focus on new code added in the PR diff (lines starting with '+')