From f4f040bf8d8443224f083d4cb544c206de9767fc Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 22 Aug 2023 16:11:51 +0300 Subject: [PATCH] publish each suggestion separably --- pr_agent/algo/git_patch_processing.py | 24 ++++++++++++------- pr_agent/algo/pr_processing.py | 10 ++++---- .../settings/pr_code_suggestions_prompts.toml | 24 ++++++++++--------- pr_agent/tools/pr_code_suggestions.py | 12 ++++++---- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 230df41e..1a2bd22b 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -1,5 +1,4 @@ from __future__ import annotations - import logging import re @@ -157,7 +156,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: example output: ## src/file.ts ---new hunk-- +__new hunk__ 881 line1 882 line2 883 line3 @@ -166,7 +165,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: 889 line6 890 line7 ... ---old hunk-- +__old hunk__ line1 line2 - line3 @@ -177,7 +176,6 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: """ patch_with_lines_str = f"\n\n## {file.filename}\n" - import re patch_lines = patch.splitlines() RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") @@ -185,23 +183,30 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: old_content_lines = [] match = None start1, size1, start2, size2 = -1, -1, -1, -1 + prev_header_line = [] + header_line =[] for line in patch_lines: if 'no newline at end of file' in line.lower(): continue if line.startswith('@@'): + header_line = line match = RE_HUNK_HEADER.match(line) if match and new_content_lines: # found a new hunk, split the previous lines if new_content_lines: - patch_with_lines_str += '\n--new hunk--\n' + if prev_header_line: + patch_with_lines_str += f'\n{prev_header_line}\n' + patch_with_lines_str += '__new hunk__\n' for i, line_new in enumerate(new_content_lines): patch_with_lines_str += f"{start2 + i} {line_new}\n" if old_content_lines: - patch_with_lines_str += '--old hunk--\n' + patch_with_lines_str += '__old hunk__\n' for line_old in old_content_lines: patch_with_lines_str += f"{line_old}\n" new_content_lines = [] old_content_lines = [] + if match: + prev_header_line = header_line try: start1, size1, start2, size2 = map(int, match.groups()[:4]) except: # '@@ -0,0 +1 @@' case @@ -219,12 +224,13 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str: # finishing last hunk if match and new_content_lines: if new_content_lines: - patch_with_lines_str += '\n--new hunk--\n' + patch_with_lines_str += f'\n{header_line}\n' + patch_with_lines_str += '\n__new hunk__\n' for i, line_new in enumerate(new_content_lines): patch_with_lines_str += f"{start2 + i} {line_new}\n" if old_content_lines: - patch_with_lines_str += '\n--old hunk--\n' + patch_with_lines_str += '\n__old hunk__\n' for line_old in old_content_lines: patch_with_lines_str += f"{line_old}\n" - return patch_with_lines_str.strip() + return patch_with_lines_str.rstrip() diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 1003f456..29709d29 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -24,7 +24,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 PATCH_EXTRA_LINES = 3 def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str, - add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str: + add_line_numbers_to_hunks: bool = True, disable_extra_lines: bool = True) -> str: """ Returns a string with the diff of the pull request, applying diff minimization techniques if needed. @@ -103,9 +103,9 @@ def pr_generate_extended_diff(pr_languages: list, # extend each patch with extra lines of context extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES) - full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n" + full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n" - if add_line_numbers_to_hunks and PATCH_EXTRA_LINES > 0: + if add_line_numbers_to_hunks: full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file) patch_tokens = token_handler.count_tokens(full_extended_patch) @@ -322,7 +322,9 @@ def clip_tokens(text: str, max_tokens: int) -> str: Returns: str: The clipped string. """ - # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word + if not text: + return text + try: encoder = get_token_encoder() num_input_tokens = len(encoder.encode(text)) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index be90c840..e32ffd72 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -6,22 +6,23 @@ Example PR Diff input: ' ## src/file1.py ---new hunk-- +@@ -12,3 +12,5 @@ def func1(): +__new hunk__ 12 code line that already existed in the file... 13 code line that already existed in the file.... 14 +new code line added in the PR 15 code line that already existed in the file... 16 code line that already existed in the file... - ---old hunk-- +__old hunk__ code line that already existed in the file... -code line that was removed in the PR code line that already existed in the file... ---new hunk-- +@@ ... @@ def func2(): +__new hunk__ ... ---old hunk-- +__old hunk__ ... @@ -31,11 +32,12 @@ Example PR Diff input: Specific instructions: - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. -- Suggestions should refer only to code from the '--new hunk--' sections, and focus on new lines of code (lines starting with '+'). +- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+'). - Provide the exact line number range (inclusive) for each issue. - Assume there is additional relevant code, that is not included in the diff. - Provide up to {{ num_code_suggestions }} code suggestions. -- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '--new hunk--' code. +- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '__new hunk__' code. +- Don't suggest to add docstring or type hints. {%- if extra_instructions %} @@ -58,19 +60,19 @@ You must use the following JSON schema to format your answer: }, "suggestion content": { "type": "string", - "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '--new hunk--' sections, starting with '+')." + "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')." }, "existing code": { "type": "string", - "description": "a code snippet showing the relevant code lines from a '--new hunk--' section. It must be continuous, correctly formatted and indented, and without line numbers." + "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers." }, "relevant lines": { "type": "string", - "description": "the relevant lines from a '--new hunk--' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above." + "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above." }, "improved code": { "type": "string", - "description": "a new code snippet that can be used to replace the relevant lines in '--new hunk--' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers." + "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers." } } } diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 4dc2f400..cc787f5e 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -70,7 +70,7 @@ class PRCodeSuggestions: if get_settings().config.publish_output: logging.info('Pushing PR review...') self.git_provider.remove_initial_comment() - logging.info('Pushing inline code comments...') + logging.info('Pushing inline code suggestions...') self.push_inline_code_suggestions(data) async def _prepare_prediction(self, model: str): @@ -138,7 +138,11 @@ class PRCodeSuggestions: if get_settings().config.verbosity_level >= 2: logging.info(f"Could not parse suggestion: {d}") - self.git_provider.publish_code_suggestions(code_suggestions) + is_successful = self.git_provider.publish_code_suggestions(code_suggestions) + if not is_successful: + logging.info("Failed to publish code suggestions, trying to publish each suggestion separately") + for code_suggestion in code_suggestions: + self.git_provider.publish_code_suggestions([code_suggestion]) def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet): try: # dedent code snippet @@ -229,8 +233,8 @@ class PRCodeSuggestions: importance_order = s['importance order'] data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1] - if get_settings().pr_extendeted_code_suggestions.final_clip_factor != 1: - new_len = int(0.5 + len(data_sorted) * get_settings().pr_extendeted_code_suggestions.final_clip_factor) + if get_settings().pr_code_suggestions.final_clip_factor != 1: + new_len = int(0.5 + len(data_sorted) * get_settings().pr_code_suggestions.final_clip_factor) data_sorted = data_sorted[:new_len] except Exception as e: if get_settings().config.verbosity_level >= 1: