mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-15 02:00:39 +08:00
Merge pull request #630 from Codium-ai/tr/language
Enhancements in Patch Formatting and Code Suggestions Handling
This commit is contained in:
@ -181,7 +181,7 @@ __old hunk__
|
||||
...
|
||||
"""
|
||||
|
||||
patch_with_lines_str = f"\n\n## {file.filename}\n"
|
||||
patch_with_lines_str = f"\n\n## file: '{file.filename.strip()}'\n"
|
||||
patch_lines = patch.splitlines()
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
@ -202,11 +202,11 @@ __old hunk__
|
||||
if new_content_lines:
|
||||
if prev_header_line:
|
||||
patch_with_lines_str += f'\n{prev_header_line}\n'
|
||||
patch_with_lines_str += '__new hunk__\n'
|
||||
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__new hunk__\n'
|
||||
for i, line_new in enumerate(new_content_lines):
|
||||
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
||||
if old_content_lines:
|
||||
patch_with_lines_str += '__old hunk__\n'
|
||||
patch_with_lines_str = patch_with_lines_str.rstrip()+'\n__old hunk__\n'
|
||||
for line_old in old_content_lines:
|
||||
patch_with_lines_str += f"{line_old}\n"
|
||||
new_content_lines = []
|
||||
@ -236,11 +236,11 @@ __old hunk__
|
||||
if match and new_content_lines:
|
||||
if new_content_lines:
|
||||
patch_with_lines_str += f'\n{header_line}\n'
|
||||
patch_with_lines_str += '\n__new hunk__\n'
|
||||
patch_with_lines_str = patch_with_lines_str.rstrip()+ '\n__new hunk__\n'
|
||||
for i, line_new in enumerate(new_content_lines):
|
||||
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
||||
if old_content_lines:
|
||||
patch_with_lines_str += '\n__old hunk__\n'
|
||||
patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
|
||||
for line_old in old_content_lines:
|
||||
patch_with_lines_str += f"{line_old}\n"
|
||||
|
||||
|
@ -209,9 +209,9 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo
|
||||
|
||||
if patch:
|
||||
if not convert_hunks_to_line_numbers:
|
||||
patch_final = f"## {file.filename}\n\n{patch}\n"
|
||||
patch_final = f"\n\n## file: '{file.filename.strip()}\n\n{patch.strip()}\n'"
|
||||
else:
|
||||
patch_final = patch
|
||||
patch_final = "\n\n" + patch.strip()
|
||||
patches.append(patch_final)
|
||||
total_tokens += token_handler.count_tokens(patch_final)
|
||||
if get_settings().config.verbosity_level >= 2:
|
||||
@ -375,6 +375,13 @@ def get_pr_multi_diffs(git_provider: GitProvider,
|
||||
for lang in pr_languages:
|
||||
sorted_files.extend(sorted(lang['files'], key=lambda x: x.tokens, reverse=True))
|
||||
|
||||
|
||||
# try first a single run with standard diff string, with patch extension, and no deletions
|
||||
patches_extended, total_tokens, patches_extended_tokens = pr_generate_extended_diff(
|
||||
pr_languages, token_handler, add_line_numbers_to_hunks=True)
|
||||
if total_tokens + OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD < get_max_tokens(model):
|
||||
return ["\n".join(patches_extended)]
|
||||
|
||||
patches = []
|
||||
final_diff_list = []
|
||||
total_tokens = token_handler.prompt_tokens
|
||||
|
@ -5,7 +5,7 @@ Your task is to generate {{ docs_for_language }} for code components in the PR D
|
||||
|
||||
Example for the PR Diff format:
|
||||
======
|
||||
## src/file1.py
|
||||
## file: 'src/file1.py'
|
||||
|
||||
@@ -12,3 +12,4 @@ def func1():
|
||||
__new hunk__
|
||||
@ -18,7 +18,6 @@ __old hunk__
|
||||
-code line that was removed in the PR
|
||||
code line2 that remained unchanged in the PR
|
||||
|
||||
|
||||
@@ ... @@ def func2():
|
||||
__new hunk__
|
||||
...
|
||||
@ -26,7 +25,7 @@ __old hunk__
|
||||
...
|
||||
|
||||
|
||||
## src/file2.py
|
||||
## file: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
|
||||
|
@ -4,7 +4,7 @@ Your task is to provide meaningful and actionable code suggestions, to improve t
|
||||
|
||||
Example for the PR Diff format:
|
||||
======
|
||||
## src/file1.py
|
||||
## file: 'src/file1.py'
|
||||
|
||||
@@ ... @@ def func1():
|
||||
__new hunk__
|
||||
@ -16,7 +16,6 @@ __old hunk__
|
||||
-old code line2 that was removed in the PR
|
||||
code line3 that remained unchanged in the PR
|
||||
|
||||
|
||||
@@ ... @@ def func2():
|
||||
__new hunk__
|
||||
...
|
||||
@ -24,7 +23,7 @@ __old hunk__
|
||||
...
|
||||
|
||||
|
||||
## src/file2.py
|
||||
## file: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
|
||||
@ -51,6 +50,7 @@ The output must be a YAML object equivalent to type $PRCodeSuggestions, accordin
|
||||
=====
|
||||
class CodeSuggestion(BaseModel):
|
||||
relevant_file: str = Field(description="the relevant file full path")
|
||||
language: str = Field(description="the code language of the relevant file")
|
||||
suggestion_content: str = Field(description="an actionable suggestion for meaningfully improving the new code introduced in the PR")
|
||||
{%- if summarize_mode %}
|
||||
existing_code: str = Field(description="a short code snippet from a '__new hunk__' section to illustrate the relevant existing code. Don't show the line numbers.")
|
||||
@ -74,6 +74,8 @@ Example output:
|
||||
code_suggestions:
|
||||
- relevant_file: |-
|
||||
src/file1.py
|
||||
language: |-
|
||||
python
|
||||
suggestion_content: |-
|
||||
Add a docstring to func1()
|
||||
{%- if summarize_mode %}
|
||||
@ -105,11 +107,6 @@ user="""PR Info:
|
||||
|
||||
Title: '{{title}}'
|
||||
|
||||
{%- if language %}
|
||||
|
||||
Main PR language: '{{ language }}'
|
||||
{%- endif %}
|
||||
|
||||
|
||||
The PR Diff:
|
||||
======
|
||||
|
@ -39,6 +39,7 @@ class PRType(str, Enum):
|
||||
|
||||
Class FileDescription(BaseModel):
|
||||
filename: str = Field(description="the relevant file full path")
|
||||
language: str = Field(description="the relevant file language")
|
||||
changes_summary: str = Field(description="concise summary of the changes in the relevant file, in bullet points (1-4 bullet points).")
|
||||
changes_title: str = Field(description="an informative title for the changes in the files, describing its main theme (5-10 words).")
|
||||
label: str = Field(description="a single semantic label that represents a type of code changes that occurred in the File. Possible values (partial list): 'bug fix', 'tests', 'enhancement', 'documentation', 'error handling', 'configuration changes', 'dependencies', 'formatting', 'miscellaneous', ...")
|
||||
@ -67,6 +68,8 @@ type:
|
||||
pr_files:
|
||||
- filename: |
|
||||
...
|
||||
language: |
|
||||
...
|
||||
changes_summary: |
|
||||
...
|
||||
changes_title: |
|
||||
@ -104,10 +107,7 @@ Previous description:
|
||||
{%- endif %}
|
||||
|
||||
Branch: '{{branch}}'
|
||||
{%- if language %}
|
||||
|
||||
Main PR language: '{{ language }}'
|
||||
{%- endif %}
|
||||
{%- if commit_messages_str %}
|
||||
|
||||
Commit messages:
|
||||
|
@ -5,7 +5,7 @@ The review should focus on new code added in the PR diff (lines starting with '+
|
||||
|
||||
Example PR Diff:
|
||||
======
|
||||
## src/file1.py
|
||||
## file: 'src/file1.py'
|
||||
|
||||
@@ -12,5 +12,5 @@ def func1():
|
||||
code line 1 that remained unchanged in the PR
|
||||
@ -14,12 +14,11 @@ code line 2 that remained unchanged in the PR
|
||||
+code line added in the PR
|
||||
code line 3 that remained unchanged in the PR
|
||||
|
||||
|
||||
@@ ... @@ def func2():
|
||||
...
|
||||
|
||||
|
||||
## src/file2.py
|
||||
## file: 'src/file2.py'
|
||||
...
|
||||
======
|
||||
|
||||
@ -115,6 +114,9 @@ PR Feedback:
|
||||
relevant file:
|
||||
type: string
|
||||
description: the relevant file full path
|
||||
language:
|
||||
type: string
|
||||
description: the language of the relevant file
|
||||
suggestion:
|
||||
type: string
|
||||
description: |-
|
||||
@ -166,6 +168,8 @@ PR Feedback:
|
||||
Code feedback:
|
||||
- relevant file: |-
|
||||
directory/xxx.py
|
||||
language: |-
|
||||
python
|
||||
suggestion: |-
|
||||
xxx [important]
|
||||
relevant line: |-
|
||||
@ -195,10 +199,6 @@ Description:
|
||||
======
|
||||
{%- endif %}
|
||||
|
||||
{%- if language %}
|
||||
|
||||
Main PR language: '{{ language }}'
|
||||
{%- endif %}
|
||||
{%- if commit_messages_str %}
|
||||
|
||||
Commit messages:
|
||||
|
@ -226,7 +226,7 @@ class PRCodeSuggestions:
|
||||
for i, patches_diff in enumerate(patches_diff_list):
|
||||
get_logger().info(f"Processing chunk {i + 1} of {len(patches_diff_list)}")
|
||||
self.patches_diff = patches_diff
|
||||
prediction = await self._get_prediction(model)
|
||||
prediction = await self._get_prediction(model) # toDo: parallelize
|
||||
prediction_list.append(prediction)
|
||||
self.prediction_list = prediction_list
|
||||
|
||||
@ -253,10 +253,15 @@ class PRCodeSuggestions:
|
||||
"""
|
||||
|
||||
suggestion_list = []
|
||||
if not data:
|
||||
return suggestion_list
|
||||
for suggestion in data:
|
||||
suggestion_list.append(suggestion)
|
||||
data_sorted = [[]] * len(suggestion_list)
|
||||
|
||||
if len(suggestion_list ) == 1:
|
||||
return suggestion_list
|
||||
|
||||
try:
|
||||
suggestion_str = ""
|
||||
for i, suggestion in enumerate(suggestion_list):
|
||||
|
Reference in New Issue
Block a user