mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-15 18:20:37 +08:00
Merge pull request #630 from Codium-ai/tr/language
Enhancements in Patch Formatting and Code Suggestions Handling
This commit is contained in:
@ -181,7 +181,7 @@ __old hunk__
|
|||||||
...
|
...
|
||||||
"""
|
"""
|
||||||
|
|
||||||
patch_with_lines_str = f"\n\n## {file.filename}\n"
|
patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n"
|
||||||
patch_lines = patch.splitlines()
|
patch_lines = patch.splitlines()
|
||||||
RE_HUNK_HEADER = re.compile(
|
RE_HUNK_HEADER = re.compile(
|
||||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||||
@ -202,11 +202,11 @@ __old hunk__
|
|||||||
if new_content_lines:
|
if new_content_lines:
|
||||||
if prev_header_line:
|
if prev_header_line:
|
||||||
patch_with_lines_str += f'\n{prev_header_line}\n'
|
patch_with_lines_str += f'\n{prev_header_line}\n'
|
||||||
patch_with_lines_str += '__new hunk__\n'
|
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__new hunk__\n'
|
||||||
for i, line_new in enumerate(new_content_lines):
|
for i, line_new in enumerate(new_content_lines):
|
||||||
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
||||||
if old_content_lines:
|
if old_content_lines:
|
||||||
patch_with_lines_str += '__old hunk__\n'
|
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__old hunk__\n'
|
||||||
for line_old in old_content_lines:
|
for line_old in old_content_lines:
|
||||||
patch_with_lines_str += f"{line_old}\n"
|
patch_with_lines_str += f"{line_old}\n"
|
||||||
new_content_lines = []
|
new_content_lines = []
|
||||||
@ -236,11 +236,11 @@ __old hunk__
|
|||||||
if match and new_content_lines:
|
if match and new_content_lines:
|
||||||
if new_content_lines:
|
if new_content_lines:
|
||||||
patch_with_lines_str += f'\n{header_line}\n'
|
patch_with_lines_str += f'\n{header_line}\n'
|
||||||
patch_with_lines_str += '\n__new hunk__\n'
|
patch_with_lines_str = patch_with_lines_str.rstrip()+ '\n__new hunk__\n'
|
||||||
for i, line_new in enumerate(new_content_lines):
|
for i, line_new in enumerate(new_content_lines):
|
||||||
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
||||||
if old_content_lines:
|
if old_content_lines:
|
||||||
patch_with_lines_str += '\n__old hunk__\n'
|
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
|
||||||
for line_old in old_content_lines:
|
for line_old in old_content_lines:
|
||||||
patch_with_lines_str += f"{line_old}\n"
|
patch_with_lines_str += f"{line_old}\n"
|
||||||
|
|
||||||
|
@ -209,9 +209,9 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
|||||||
|
|
||||||
if patch:
|
if patch:
|
||||||
if not convert_hunks_to_line_numbers:
|
if not convert_hunks_to_line_numbers:
|
||||||
patch_final = f"## {file.filename}\n\n{patch}\n"
|
patch_final = f"\n\n## file: '{file.filename.strip()}\n\n{patch.strip()}\n'"
|
||||||
else:
|
else:
|
||||||
patch_final = patch
|
patch_final = "\n\n" + patch.strip()
|
||||||
patches.append(patch_final)
|
patches.append(patch_final)
|
||||||
total_tokens += token_handler.count_tokens(patch_final)
|
total_tokens += token_handler.count_tokens(patch_final)
|
||||||
if get_settings().config.verbosity_level >= 2:
|
if get_settings().config.verbosity_level >= 2:
|
||||||
@ -375,6 +375,13 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
|||||||
for lang in pr_languages:
|
for lang in pr_languages:
|
||||||
sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))
|
sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))
|
||||||
|
|
||||||
|
|
||||||
|
# try first a single run with standard diff string, with patch extension, and no deletions
|
||||||
|
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
|
||||||
|
pr_languages, token_handler, add_line_numbers_to_hunks=True)
|
||||||
|
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
|
||||||
|
return ["\n".join(patches_extended)]
|
||||||
|
|
||||||
patches = []
|
patches = []
|
||||||
final_diff_list = []
|
final_diff_list = []
|
||||||
total_tokens = token_handler.prompt_tokens
|
total_tokens = token_handler.prompt_tokens
|
||||||
|
@ -5,7 +5,7 @@ Your task is to generate {{ docs_for_language }} for code components in the PR D
|
|||||||
|
|
||||||
Example for the PR Diff format:
|
Example for the PR Diff format:
|
||||||
======
|
======
|
||||||
## src/file1.py
|
## file: 'src/file1.py'
|
||||||
|
|
||||||
@@ -12,3 +12,4 @@ def func1():
|
@@ -12,3 +12,4 @@ def func1():
|
||||||
__new hunk__
|
__new hunk__
|
||||||
@ -18,7 +18,6 @@ __old hunk__
|
|||||||
-code line that was removed in the PR
|
-code line that was removed in the PR
|
||||||
code line2 that remained unchanged in the PR
|
code line2 that remained unchanged in the PR
|
||||||
|
|
||||||
|
|
||||||
@@ ... @@ def func2():
|
@@ ... @@ def func2():
|
||||||
__new hunk__
|
__new hunk__
|
||||||
...
|
...
|
||||||
@ -26,7 +25,7 @@ __old hunk__
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
## src/file2.py
|
## file: 'src/file2.py'
|
||||||
...
|
...
|
||||||
======
|
======
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ Your task is to provide meaningful and actionable code suggestions, to improve t
|
|||||||
|
|
||||||
Example for the PR Diff format:
|
Example for the PR Diff format:
|
||||||
======
|
======
|
||||||
## src/file1.py
|
## file: 'src/file1.py'
|
||||||
|
|
||||||
@@ ... @@ def func1():
|
@@ ... @@ def func1():
|
||||||
__new hunk__
|
__new hunk__
|
||||||
@ -16,7 +16,6 @@ __old hunk__
|
|||||||
-old code line2 that was removed in the PR
|
-old code line2 that was removed in the PR
|
||||||
code line3 that remained unchanged in the PR
|
code line3 that remained unchanged in the PR
|
||||||
|
|
||||||
|
|
||||||
@@ ... @@ def func2():
|
@@ ... @@ def func2():
|
||||||
__new hunk__
|
__new hunk__
|
||||||
...
|
...
|
||||||
@ -24,7 +23,7 @@ __old hunk__
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
## src/file2.py
|
## file: 'src/file2.py'
|
||||||
...
|
...
|
||||||
======
|
======
|
||||||
|
|
||||||
@ -51,6 +50,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin
|
|||||||
=====
|
=====
|
||||||
class CodeSuggestion(BaseModel):
|
class CodeSuggestion(BaseModel):
|
||||||
relevant_file: str = Field(description="the relevant file full path")
|
relevant_file: str = Field(description="the relevant file full path")
|
||||||
|
language: str = Field(description="the code language of the relevant file")
|
||||||
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
|
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
|
||||||
{%- if summarize_mode %}
|
{%- if summarize_mode %}
|
||||||
existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
|
existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
|
||||||
@ -74,6 +74,8 @@ Example output:
|
|||||||
code_suggestions:
|
code_suggestions:
|
||||||
- relevant_file: |-
|
- relevant_file: |-
|
||||||
src/file1.py
|
src/file1.py
|
||||||
|
language: |-
|
||||||
|
python
|
||||||
suggestion_content: |-
|
suggestion_content: |-
|
||||||
Add a docstring to func1()
|
Add a docstring to func1()
|
||||||
{%- if summarize_mode %}
|
{%- if summarize_mode %}
|
||||||
@ -105,11 +107,6 @@ user="""PR Info:
|
|||||||
|
|
||||||
Title: '{{title}}'
|
Title: '{{title}}'
|
||||||
|
|
||||||
{%- if language %}
|
|
||||||
|
|
||||||
Main PR language: '{{ language }}'
|
|
||||||
{%- endif %}
|
|
||||||
|
|
||||||
|
|
||||||
The PR Diff:
|
The PR Diff:
|
||||||
======
|
======
|
||||||
|
@ -39,6 +39,7 @@ class PRType(str, Enum):
|
|||||||
|
|
||||||
Class FileDescription(BaseModel):
|
Class FileDescription(BaseModel):
|
||||||
filename: str = Field(description="the relevant file full path")
|
filename: str = Field(description="the relevant file full path")
|
||||||
|
language: str = Field(description="the relevant file language")
|
||||||
changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
|
changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
|
||||||
changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).")
|
changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).")
|
||||||
label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
|
label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
|
||||||
@ -67,6 +68,8 @@ type:
|
|||||||
pr_files:
|
pr_files:
|
||||||
- filename: |
|
- filename: |
|
||||||
...
|
...
|
||||||
|
language: |
|
||||||
|
...
|
||||||
changes_summary: |
|
changes_summary: |
|
||||||
...
|
...
|
||||||
changes_title: |
|
changes_title: |
|
||||||
@ -104,10 +107,7 @@ Previous description:
|
|||||||
{%- endif %}
|
{%- endif %}
|
||||||
|
|
||||||
Branch: '{{branch}}'
|
Branch: '{{branch}}'
|
||||||
{%- if language %}
|
|
||||||
|
|
||||||
Main PR language: '{{ language }}'
|
|
||||||
{%- endif %}
|
|
||||||
{%- if commit_messages_str %}
|
{%- if commit_messages_str %}
|
||||||
|
|
||||||
Commit messages:
|
Commit messages:
|
||||||
|
@ -5,7 +5,7 @@ The review should focus on new code added in the PR diff (lines starting with '+
|
|||||||
|
|
||||||
Example PR Diff:
|
Example PR Diff:
|
||||||
======
|
======
|
||||||
## src/file1.py
|
## file: 'src/file1.py'
|
||||||
|
|
||||||
@@ -12,5 +12,5 @@ def func1():
|
@@ -12,5 +12,5 @@ def func1():
|
||||||
code line 1 that remained unchanged in the PR
|
code line 1 that remained unchanged in the PR
|
||||||
@ -14,12 +14,11 @@ code line 2 that remained unchanged in the PR
|
|||||||
+code line added in the PR
|
+code line added in the PR
|
||||||
code line 3 that remained unchanged in the PR
|
code line 3 that remained unchanged in the PR
|
||||||
|
|
||||||
|
|
||||||
@@ ... @@ def func2():
|
@@ ... @@ def func2():
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
## src/file2.py
|
## file: 'src/file2.py'
|
||||||
...
|
...
|
||||||
======
|
======
|
||||||
|
|
||||||
@ -115,6 +114,9 @@ PR Feedback:
|
|||||||
relevant file:
|
relevant file:
|
||||||
type: string
|
type: string
|
||||||
description: the relevant file full path
|
description: the relevant file full path
|
||||||
|
language:
|
||||||
|
type: string
|
||||||
|
description: the language of the relevant file
|
||||||
suggestion:
|
suggestion:
|
||||||
type: string
|
type: string
|
||||||
description: |-
|
description: |-
|
||||||
@ -166,6 +168,8 @@ PR Feedback:
|
|||||||
Code feedback:
|
Code feedback:
|
||||||
- relevant file: |-
|
- relevant file: |-
|
||||||
directory/xxx.py
|
directory/xxx.py
|
||||||
|
language: |-
|
||||||
|
python
|
||||||
suggestion: |-
|
suggestion: |-
|
||||||
xxx [important]
|
xxx [important]
|
||||||
relevant line: |-
|
relevant line: |-
|
||||||
@ -195,10 +199,6 @@ Description:
|
|||||||
======
|
======
|
||||||
{%- endif %}
|
{%- endif %}
|
||||||
|
|
||||||
{%- if language %}
|
|
||||||
|
|
||||||
Main PR language: '{{ language }}'
|
|
||||||
{%- endif %}
|
|
||||||
{%- if commit_messages_str %}
|
{%- if commit_messages_str %}
|
||||||
|
|
||||||
Commit messages:
|
Commit messages:
|
||||||
|
@ -226,7 +226,7 @@ class PRCodeSuggestions:
|
|||||||
for i, patches_diff in enumerate(patches_diff_list):
|
for i, patches_diff in enumerate(patches_diff_list):
|
||||||
get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
|
get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
|
||||||
self.patches_diff = patches_diff
|
self.patches_diff = patches_diff
|
||||||
prediction = await self._get_prediction(model)
|
prediction = await self._get_prediction(model) # toDo: parallelize
|
||||||
prediction_list.append(prediction)
|
prediction_list.append(prediction)
|
||||||
self.prediction_list = prediction_list
|
self.prediction_list = prediction_list
|
||||||
|
|
||||||
@ -253,10 +253,15 @@ class PRCodeSuggestions:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
suggestion_list = []
|
suggestion_list = []
|
||||||
|
if not data:
|
||||||
|
return suggestion_list
|
||||||
for suggestion in data:
|
for suggestion in data:
|
||||||
suggestion_list.append(suggestion)
|
suggestion_list.append(suggestion)
|
||||||
data_sorted = [[]] * len(suggestion_list)
|
data_sorted = [[]] * len(suggestion_list)
|
||||||
|
|
||||||
|
if len(suggestion_list ) == 1:
|
||||||
|
return suggestion_list
|
||||||
|
|
||||||
try:
|
try:
|
||||||
suggestion_str = ""
|
suggestion_str = ""
|
||||||
for i, suggestion in enumerate(suggestion_list):
|
for i, suggestion in enumerate(suggestion_list):
|
||||||
|
Reference in New Issue
Block a user