publish each suggestion separably

This commit is contained in:
mrT23
2023-08-22 16:11:51 +03:00
parent b85679e5e4
commit f4f040bf8d
4 changed files with 42 additions and 28 deletions

View File

@ -1,5 +1,4 @@
from __future__ import annotations from __future__ import annotations
import logging import logging
import re import re
@ -157,7 +156,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
example output: example output:
## src/file.ts ## src/file.ts
--new hunk-- __new hunk__
881 line1 881 line1
882 line2 882 line2
883 line3 883 line3
@ -166,7 +165,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
889 line6 889 line6
890 line7 890 line7
... ...
--old hunk-- __old hunk__
line1 line1
line2 line2
- line3 - line3
@ -177,7 +176,6 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
""" """
patch_with_lines_str = f"\n\n## {file.filename}\n" patch_with_lines_str = f"\n\n## {file.filename}\n"
import re
patch_lines = patch.splitlines() patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile( RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
@ -185,23 +183,30 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
old_content_lines = [] old_content_lines = []
match = None match = None
start1, size1, start2, size2 = -1, -1, -1, -1 start1, size1, start2, size2 = -1, -1, -1, -1
prev_header_line = []
header_line =[]
for line in patch_lines: for line in patch_lines:
if 'no newline at end of file' in line.lower(): if 'no newline at end of file' in line.lower():
continue continue
if line.startswith('@@'): if line.startswith('@@'):
header_line = line
match = RE_HUNK_HEADER.match(line) match = RE_HUNK_HEADER.match(line)
if match and new_content_lines: # found a new hunk, split the previous lines if match and new_content_lines: # found a new hunk, split the previous lines
if new_content_lines: if new_content_lines:
patch_with_lines_str += '\n--new hunk--\n' if prev_header_line:
patch_with_lines_str += f'\n{prev_header_line}\n'
patch_with_lines_str += '__new hunk__\n'
for i, line_new in enumerate(new_content_lines): for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n" patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines: if old_content_lines:
patch_with_lines_str += '--old hunk--\n' patch_with_lines_str += '__old hunk__\n'
for line_old in old_content_lines: for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n" patch_with_lines_str += f"{line_old}\n"
new_content_lines = [] new_content_lines = []
old_content_lines = [] old_content_lines = []
if match:
prev_header_line = header_line
try: try:
start1, size1, start2, size2 = map(int, match.groups()[:4]) start1, size1, start2, size2 = map(int, match.groups()[:4])
except: # '@@ -0,0 +1 @@' case except: # '@@ -0,0 +1 @@' case
@ -219,12 +224,13 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
# finishing last hunk # finishing last hunk
if match and new_content_lines: if match and new_content_lines:
if new_content_lines: if new_content_lines:
patch_with_lines_str += '\n--new hunk--\n' patch_with_lines_str += f'\n{header_line}\n'
patch_with_lines_str += '\n__new hunk__\n'
for i, line_new in enumerate(new_content_lines): for i, line_new in enumerate(new_content_lines):
patch_with_lines_str += f"{start2 + i} {line_new}\n" patch_with_lines_str += f"{start2 + i} {line_new}\n"
if old_content_lines: if old_content_lines:
patch_with_lines_str += '\n--old hunk--\n' patch_with_lines_str += '\n__old hunk__\n'
for line_old in old_content_lines: for line_old in old_content_lines:
patch_with_lines_str += f"{line_old}\n" patch_with_lines_str += f"{line_old}\n"
return patch_with_lines_str.strip() return patch_with_lines_str.rstrip()

View File

@ -24,7 +24,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
PATCH_EXTRA_LINES = 3 PATCH_EXTRA_LINES = 3
def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str: add_line_numbers_to_hunks: bool = True, disable_extra_lines: bool = True) -> str:
""" """
Returns a string with the diff of the pull request, applying diff minimization techniques if needed. Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
@ -103,9 +103,9 @@ def pr_generate_extended_diff(pr_languages: list,
# extend each patch with extra lines of context # extend each patch with extra lines of context
extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES) extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n" full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
if add_line_numbers_to_hunks and PATCH_EXTRA_LINES > 0: if add_line_numbers_to_hunks:
full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file) full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
patch_tokens = token_handler.count_tokens(full_extended_patch) patch_tokens = token_handler.count_tokens(full_extended_patch)
@ -322,7 +322,9 @@ def clip_tokens(text: str, max_tokens: int) -> str:
Returns: Returns:
str: The clipped string. str: The clipped string.
""" """
# We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word if not text:
return text
try: try:
encoder = get_token_encoder() encoder = get_token_encoder()
num_input_tokens = len(encoder.encode(text)) num_input_tokens = len(encoder.encode(text))

View File

@ -6,22 +6,23 @@ Example PR Diff input:
' '
## src/file1.py ## src/file1.py
--new hunk-- @@ -12,3 +12,5 @@ def func1():
__new hunk__
12 code line that already existed in the file... 12 code line that already existed in the file...
13 code line that already existed in the file.... 13 code line that already existed in the file....
14 +new code line added in the PR 14 +new code line added in the PR
15 code line that already existed in the file... 15 code line that already existed in the file...
16 code line that already existed in the file... 16 code line that already existed in the file...
__old hunk__
--old hunk--
code line that already existed in the file... code line that already existed in the file...
-code line that was removed in the PR -code line that was removed in the PR
code line that already existed in the file... code line that already existed in the file...
--new hunk-- @@ ... @@ def func2():
__new hunk__
... ...
--old hunk-- __old hunk__
... ...
@ -31,11 +32,12 @@ Example PR Diff input:
Specific instructions: Specific instructions:
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Suggestions should refer only to code from the '--new hunk--' sections, and focus on new lines of code (lines starting with '+'). - Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Provide the exact line number range (inclusive) for each issue. - Provide the exact line number range (inclusive) for each issue.
- Assume there is additional relevant code, that is not included in the diff. - Assume there is additional relevant code, that is not included in the diff.
- Provide up to {{ num_code_suggestions }} code suggestions. - Provide up to {{ num_code_suggestions }} code suggestions.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '--new hunk--' code. - Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '__new hunk__' code.
- Don't suggest to add docstring or type hints.
{%- if extra_instructions %} {%- if extra_instructions %}
@ -58,19 +60,19 @@ You must use the following JSON schema to format your answer:
}, },
"suggestion content": { "suggestion content": {
"type": "string", "type": "string",
"description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '--new hunk--' sections, starting with '+')." "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
}, },
"existing code": { "existing code": {
"type": "string", "type": "string",
"description": "a code snippet showing the relevant code lines from a '--new hunk--' section. It must be continuous, correctly formatted and indented, and without line numbers." "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
}, },
"relevant lines": { "relevant lines": {
"type": "string", "type": "string",
"description": "the relevant lines from a '--new hunk--' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above." "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
}, },
"improved code": { "improved code": {
"type": "string", "type": "string",
"description": "a new code snippet that can be used to replace the relevant lines in '--new hunk--' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers." "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
} }
} }
} }

View File

@ -70,7 +70,7 @@ class PRCodeSuggestions:
if get_settings().config.publish_output: if get_settings().config.publish_output:
logging.info('Pushing PR review...') logging.info('Pushing PR review...')
self.git_provider.remove_initial_comment() self.git_provider.remove_initial_comment()
logging.info('Pushing inline code comments...') logging.info('Pushing inline code suggestions...')
self.push_inline_code_suggestions(data) self.push_inline_code_suggestions(data)
async def _prepare_prediction(self, model: str): async def _prepare_prediction(self, model: str):
@ -138,7 +138,11 @@ class PRCodeSuggestions:
if get_settings().config.verbosity_level >= 2: if get_settings().config.verbosity_level >= 2:
logging.info(f"Could not parse suggestion: {d}") logging.info(f"Could not parse suggestion: {d}")
self.git_provider.publish_code_suggestions(code_suggestions) is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
if not is_successful:
logging.info("Failed to publish code suggestions, trying to publish each suggestion separately")
for code_suggestion in code_suggestions:
self.git_provider.publish_code_suggestions([code_suggestion])
def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet): def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
try: # dedent code snippet try: # dedent code snippet
@ -229,8 +233,8 @@ class PRCodeSuggestions:
importance_order = s['importance order'] importance_order = s['importance order']
data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1] data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1]
if get_settings().pr_extendeted_code_suggestions.final_clip_factor != 1: if get_settings().pr_code_suggestions.final_clip_factor != 1:
new_len = int(0.5 + len(data_sorted) * get_settings().pr_extendeted_code_suggestions.final_clip_factor) new_len = int(0.5 + len(data_sorted) * get_settings().pr_code_suggestions.final_clip_factor)
data_sorted = data_sorted[:new_len] data_sorted = data_sorted[:new_len]
except Exception as e: except Exception as e:
if get_settings().config.verbosity_level >= 1: if get_settings().config.verbosity_level >= 1: