mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-03 04:10:49 +08:00
publish each suggestion separably
This commit is contained in:
@ -1,5 +1,4 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@ -157,7 +156,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
|||||||
|
|
||||||
example output:
|
example output:
|
||||||
## src/file.ts
|
## src/file.ts
|
||||||
--new hunk--
|
__new hunk__
|
||||||
881 line1
|
881 line1
|
||||||
882 line2
|
882 line2
|
||||||
883 line3
|
883 line3
|
||||||
@ -166,7 +165,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
|||||||
889 line6
|
889 line6
|
||||||
890 line7
|
890 line7
|
||||||
...
|
...
|
||||||
--old hunk--
|
__old hunk__
|
||||||
line1
|
line1
|
||||||
line2
|
line2
|
||||||
- line3
|
- line3
|
||||||
@ -177,7 +176,6 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
patch_with_lines_str = f"\n\n## {file.filename}\n"
|
patch_with_lines_str = f"\n\n## {file.filename}\n"
|
||||||
import re
|
|
||||||
patch_lines = patch.splitlines()
|
patch_lines = patch.splitlines()
|
||||||
RE_HUNK_HEADER = re.compile(
|
RE_HUNK_HEADER = re.compile(
|
||||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||||
@ -185,23 +183,30 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
|||||||
old_content_lines = []
|
old_content_lines = []
|
||||||
match = None
|
match = None
|
||||||
start1, size1, start2, size2 = -1, -1, -1, -1
|
start1, size1, start2, size2 = -1, -1, -1, -1
|
||||||
|
prev_header_line = []
|
||||||
|
header_line =[]
|
||||||
for line in patch_lines:
|
for line in patch_lines:
|
||||||
if 'no newline at end of file' in line.lower():
|
if 'no newline at end of file' in line.lower():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if line.startswith('@@'):
|
if line.startswith('@@'):
|
||||||
|
header_line = line
|
||||||
match = RE_HUNK_HEADER.match(line)
|
match = RE_HUNK_HEADER.match(line)
|
||||||
if match and new_content_lines: # found a new hunk, split the previous lines
|
if match and new_content_lines: # found a new hunk, split the previous lines
|
||||||
if new_content_lines:
|
if new_content_lines:
|
||||||
patch_with_lines_str += '\n--new hunk--\n'
|
if prev_header_line:
|
||||||
|
patch_with_lines_str += f'\n{prev_header_line}\n'
|
||||||
|
patch_with_lines_str += '__new hunk__\n'
|
||||||
for i, line_new in enumerate(new_content_lines):
|
for i, line_new in enumerate(new_content_lines):
|
||||||
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
||||||
if old_content_lines:
|
if old_content_lines:
|
||||||
patch_with_lines_str += '--old hunk--\n'
|
patch_with_lines_str += '__old hunk__\n'
|
||||||
for line_old in old_content_lines:
|
for line_old in old_content_lines:
|
||||||
patch_with_lines_str += f"{line_old}\n"
|
patch_with_lines_str += f"{line_old}\n"
|
||||||
new_content_lines = []
|
new_content_lines = []
|
||||||
old_content_lines = []
|
old_content_lines = []
|
||||||
|
if match:
|
||||||
|
prev_header_line = header_line
|
||||||
try:
|
try:
|
||||||
start1, size1, start2, size2 = map(int, match.groups()[:4])
|
start1, size1, start2, size2 = map(int, match.groups()[:4])
|
||||||
except: # '@@ -0,0 +1 @@' case
|
except: # '@@ -0,0 +1 @@' case
|
||||||
@ -219,12 +224,13 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
|
|||||||
# finishing last hunk
|
# finishing last hunk
|
||||||
if match and new_content_lines:
|
if match and new_content_lines:
|
||||||
if new_content_lines:
|
if new_content_lines:
|
||||||
patch_with_lines_str += '\n--new hunk--\n'
|
patch_with_lines_str += f'\n{header_line}\n'
|
||||||
|
patch_with_lines_str += '\n__new hunk__\n'
|
||||||
for i, line_new in enumerate(new_content_lines):
|
for i, line_new in enumerate(new_content_lines):
|
||||||
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
patch_with_lines_str += f"{start2 + i} {line_new}\n"
|
||||||
if old_content_lines:
|
if old_content_lines:
|
||||||
patch_with_lines_str += '\n--old hunk--\n'
|
patch_with_lines_str += '\n__old hunk__\n'
|
||||||
for line_old in old_content_lines:
|
for line_old in old_content_lines:
|
||||||
patch_with_lines_str += f"{line_old}\n"
|
patch_with_lines_str += f"{line_old}\n"
|
||||||
|
|
||||||
return patch_with_lines_str.strip()
|
return patch_with_lines_str.rstrip()
|
||||||
|
@ -24,7 +24,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
|
|||||||
PATCH_EXTRA_LINES = 3
|
PATCH_EXTRA_LINES = 3
|
||||||
|
|
||||||
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
|
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
|
||||||
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
|
add_line_numbers_to_hunks: bool = True, disable_extra_lines: bool = True) -> str:
|
||||||
"""
|
"""
|
||||||
Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
|
Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
|
||||||
|
|
||||||
@ -103,9 +103,9 @@ def pr_generate_extended_diff(pr_languages: list,
|
|||||||
|
|
||||||
# extend each patch with extra lines of context
|
# extend each patch with extra lines of context
|
||||||
extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
|
extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
|
||||||
full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n"
|
full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
|
||||||
|
|
||||||
if add_line_numbers_to_hunks and PATCH_EXTRA_LINES > 0:
|
if add_line_numbers_to_hunks:
|
||||||
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
|
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
|
||||||
|
|
||||||
patch_tokens = token_handler.count_tokens(full_extended_patch)
|
patch_tokens = token_handler.count_tokens(full_extended_patch)
|
||||||
@ -322,7 +322,9 @@ def clip_tokens(text: str, max_tokens: int) -> str:
|
|||||||
Returns:
|
Returns:
|
||||||
str: The clipped string.
|
str: The clipped string.
|
||||||
"""
|
"""
|
||||||
# We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
|
if not text:
|
||||||
|
return text
|
||||||
|
|
||||||
try:
|
try:
|
||||||
encoder = get_token_encoder()
|
encoder = get_token_encoder()
|
||||||
num_input_tokens = len(encoder.encode(text))
|
num_input_tokens = len(encoder.encode(text))
|
||||||
|
@ -6,22 +6,23 @@ Example PR Diff input:
|
|||||||
'
|
'
|
||||||
## src/file1.py
|
## src/file1.py
|
||||||
|
|
||||||
--new hunk--
|
@@ -12,3 +12,5 @@ def func1():
|
||||||
|
__new hunk__
|
||||||
12 code line that already existed in the file...
|
12 code line that already existed in the file...
|
||||||
13 code line that already existed in the file....
|
13 code line that already existed in the file....
|
||||||
14 +new code line added in the PR
|
14 +new code line added in the PR
|
||||||
15 code line that already existed in the file...
|
15 code line that already existed in the file...
|
||||||
16 code line that already existed in the file...
|
16 code line that already existed in the file...
|
||||||
|
__old hunk__
|
||||||
--old hunk--
|
|
||||||
code line that already existed in the file...
|
code line that already existed in the file...
|
||||||
-code line that was removed in the PR
|
-code line that was removed in the PR
|
||||||
code line that already existed in the file...
|
code line that already existed in the file...
|
||||||
|
|
||||||
|
|
||||||
--new hunk--
|
@@ ... @@ def func2():
|
||||||
|
__new hunk__
|
||||||
...
|
...
|
||||||
--old hunk--
|
__old hunk__
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
@ -31,11 +32,12 @@ Example PR Diff input:
|
|||||||
|
|
||||||
Specific instructions:
|
Specific instructions:
|
||||||
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
|
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
|
||||||
- Suggestions should refer only to code from the '--new hunk--' sections, and focus on new lines of code (lines starting with '+').
|
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
|
||||||
- Provide the exact line number range (inclusive) for each issue.
|
- Provide the exact line number range (inclusive) for each issue.
|
||||||
- Assume there is additional relevant code, that is not included in the diff.
|
- Assume there is additional relevant code, that is not included in the diff.
|
||||||
- Provide up to {{ num_code_suggestions }} code suggestions.
|
- Provide up to {{ num_code_suggestions }} code suggestions.
|
||||||
- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '--new hunk--' code.
|
- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '__new hunk__' code.
|
||||||
|
- Don't suggest to add docstring or type hints.
|
||||||
|
|
||||||
{%- if extra_instructions %}
|
{%- if extra_instructions %}
|
||||||
|
|
||||||
@ -58,19 +60,19 @@ You must use the following JSON schema to format your answer:
|
|||||||
},
|
},
|
||||||
"suggestion content": {
|
"suggestion content": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '--new hunk--' sections, starting with '+')."
|
"description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
|
||||||
},
|
},
|
||||||
"existing code": {
|
"existing code": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "a code snippet showing the relevant code lines from a '--new hunk--' section. It must be continuous, correctly formatted and indented, and without line numbers."
|
"description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
|
||||||
},
|
},
|
||||||
"relevant lines": {
|
"relevant lines": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "the relevant lines from a '--new hunk--' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
|
"description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
|
||||||
},
|
},
|
||||||
"improved code": {
|
"improved code": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "a new code snippet that can be used to replace the relevant lines in '--new hunk--' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
|
"description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -70,7 +70,7 @@ class PRCodeSuggestions:
|
|||||||
if get_settings().config.publish_output:
|
if get_settings().config.publish_output:
|
||||||
logging.info('Pushing PR review...')
|
logging.info('Pushing PR review...')
|
||||||
self.git_provider.remove_initial_comment()
|
self.git_provider.remove_initial_comment()
|
||||||
logging.info('Pushing inline code comments...')
|
logging.info('Pushing inline code suggestions...')
|
||||||
self.push_inline_code_suggestions(data)
|
self.push_inline_code_suggestions(data)
|
||||||
|
|
||||||
async def _prepare_prediction(self, model: str):
|
async def _prepare_prediction(self, model: str):
|
||||||
@ -138,7 +138,11 @@ class PRCodeSuggestions:
|
|||||||
if get_settings().config.verbosity_level >= 2:
|
if get_settings().config.verbosity_level >= 2:
|
||||||
logging.info(f"Could not parse suggestion: {d}")
|
logging.info(f"Could not parse suggestion: {d}")
|
||||||
|
|
||||||
self.git_provider.publish_code_suggestions(code_suggestions)
|
is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
|
||||||
|
if not is_successful:
|
||||||
|
logging.info("Failed to publish code suggestions, trying to publish each suggestion separately")
|
||||||
|
for code_suggestion in code_suggestions:
|
||||||
|
self.git_provider.publish_code_suggestions([code_suggestion])
|
||||||
|
|
||||||
def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
|
def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
|
||||||
try: # dedent code snippet
|
try: # dedent code snippet
|
||||||
@ -229,8 +233,8 @@ class PRCodeSuggestions:
|
|||||||
importance_order = s['importance order']
|
importance_order = s['importance order']
|
||||||
data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1]
|
data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1]
|
||||||
|
|
||||||
if get_settings().pr_extendeted_code_suggestions.final_clip_factor != 1:
|
if get_settings().pr_code_suggestions.final_clip_factor != 1:
|
||||||
new_len = int(0.5 + len(data_sorted) * get_settings().pr_extendeted_code_suggestions.final_clip_factor)
|
new_len = int(0.5 + len(data_sorted) * get_settings().pr_code_suggestions.final_clip_factor)
|
||||||
data_sorted = data_sorted[:new_len]
|
data_sorted = data_sorted[:new_len]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if get_settings().config.verbosity_level >= 1:
|
if get_settings().config.verbosity_level >= 1:
|
||||||
|
Reference in New Issue
Block a user