publish each suggestion separably

2025-07-21 04:50:39 +08:00 · 2023-08-22 16:11:51 +03:00
parent b85679e5e4
commit f4f040bf8d
4 changed files with 42 additions and 28 deletions
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -1,5 +1,4 @@
 from __future__ import annotations
 import logging
 import re
@ -157,7 +156,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    example output:
 ## src/file.ts
--new hunk--
+__new hunk__
 881        line1
 882        line2
 883        line3
@ -166,7 +165,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
 889        line6
 890        line7
 ...
--old hunk--
+__old hunk__
        line1
        line2
 -       line3
@ -177,7 +176,6 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    """
    patch_with_lines_str = f"\n\n## {file.filename}\n"
    import re
    patch_lines = patch.splitlines()
    RE_HUNK_HEADER = re.compile(
        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
@ -185,23 +183,30 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    old_content_lines = []
    match = None
    start1, size1, start2, size2 = -1, -1, -1, -1
    prev_header_line = []
    header_line =[]
    for line in patch_lines:
        if 'no newline at end of file' in line.lower():
            continue
        if line.startswith('@@'):
            header_line = line
            match = RE_HUNK_HEADER.match(line)
            if match and new_content_lines:  # found a new hunk, split the previous lines
                if new_content_lines:
-                    patch_with_lines_str += '\n--new hunk--\n'
+                    if prev_header_line:
                        patch_with_lines_str += f'\n{prev_header_line}\n'
                    patch_with_lines_str += '__new hunk__\n'
                    for i, line_new in enumerate(new_content_lines):
                        patch_with_lines_str += f"{start2 + i} {line_new}\n"
                if old_content_lines:
-                    patch_with_lines_str += '--old hunk--\n'
+                    patch_with_lines_str += '__old hunk__\n'
                    for line_old in old_content_lines:
                        patch_with_lines_str += f"{line_old}\n"
                new_content_lines = []
                old_content_lines = []
            if match:
                prev_header_line = header_line
            try:
                start1, size1, start2, size2 = map(int, match.groups()[:4])
            except: # '@@ -0,0 +1 @@' case
@ -219,12 +224,13 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    # finishing last hunk
    if match and new_content_lines:
        if new_content_lines:
-            patch_with_lines_str += '\n--new hunk--\n'
+            patch_with_lines_str += f'\n{header_line}\n'
            patch_with_lines_str += '\n__new hunk__\n'
            for i, line_new in enumerate(new_content_lines):
                patch_with_lines_str += f"{start2 + i} {line_new}\n"
        if old_content_lines:
-            patch_with_lines_str += '\n--old hunk--\n'
+            patch_with_lines_str += '\n__old hunk__\n'
            for line_old in old_content_lines:
                patch_with_lines_str += f"{line_old}\n"
-    return patch_with_lines_str.strip()
+    return patch_with_lines_str.rstrip()
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -24,7 +24,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
 PATCH_EXTRA_LINES = 3
 def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
-                add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
+                add_line_numbers_to_hunks: bool = True, disable_extra_lines: bool = True) -> str:
    """
    Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
@ -103,9 +103,9 @@ def pr_generate_extended_diff(pr_languages: list,
            # extend each patch with extra lines of context
            extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
-            full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n"
+            full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
-            if add_line_numbers_to_hunks and PATCH_EXTRA_LINES > 0:
+            if add_line_numbers_to_hunks:
                full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
            patch_tokens = token_handler.count_tokens(full_extended_patch)
@ -322,7 +322,9 @@ def clip_tokens(text: str, max_tokens: int) -> str:
    Returns:
        str: The clipped string.
    """
-    # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
+    if not text:
        return text
    try:
        encoder = get_token_encoder()
        num_input_tokens = len(encoder.encode(text))
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -6,22 +6,23 @@ Example PR Diff input:
 '
 ## src/file1.py
--new hunk--
+@@ -12,3 +12,5 @@ def func1():
 __new hunk__
 12  code line that already existed in the file...
 13  code line that already existed in the file....
 14 +new code line added in the PR
 15  code line that already existed in the file...
 16  code line that already existed in the file...
-
+__old hunk__
 --old hunk--
 code line that already existed in the file...
 -code line that was removed in the PR
 code line that already existed in the file...
--new hunk--
+@@ ... @@ def func2():
 __new hunk__
 ...
--old hunk--
+__old hunk__
 ...
@ -31,11 +32,12 @@ Example PR Diff input:
 Specific instructions:
 - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Suggestions should refer only to code from the '--new hunk--' sections, and focus on new lines of code (lines starting with '+').
+- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
 - Provide the exact line number range (inclusive) for each issue.
 - Assume there is additional relevant code, that is not included in the diff.
 - Provide up to {{ num_code_suggestions }} code suggestions.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '--new hunk--' code.
+- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '__new hunk__' code.
 - Don't suggest to add docstring or type hints.
 {%- if extra_instructions %}
@ -58,19 +60,19 @@ You must use the following JSON schema to format your answer:
        },
        "suggestion content": {
          "type": "string",
-          "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '--new hunk--' sections, starting with '+')."
+          "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
        },
        "existing code": {
          "type": "string",
-          "description": "a code snippet showing the relevant code lines from a '--new hunk--' section. It must be continuous, correctly formatted and indented, and without line numbers."
+          "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
        },
        "relevant lines": {
          "type": "string",
-          "description": "the relevant lines from a '--new hunk--' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
+          "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
        },
        "improved code": {
          "type": "string",
-          "description": "a new code snippet that can be used to replace the relevant lines in '--new hunk--' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
+          "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
        }
      }
    }
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -70,7 +70,7 @@ class PRCodeSuggestions:
        if get_settings().config.publish_output:
            logging.info('Pushing PR review...')
            self.git_provider.remove_initial_comment()
-            logging.info('Pushing inline code comments...')
+            logging.info('Pushing inline code suggestions...')
            self.push_inline_code_suggestions(data)
    async def _prepare_prediction(self, model: str):
@ -138,7 +138,11 @@ class PRCodeSuggestions:
                if get_settings().config.verbosity_level >= 2:
                    logging.info(f"Could not parse suggestion: {d}")
-        self.git_provider.publish_code_suggestions(code_suggestions)
+        is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
        if not is_successful:
            logging.info("Failed to publish code suggestions, trying to publish each suggestion separately")
            for code_suggestion in code_suggestions:
                self.git_provider.publish_code_suggestions([code_suggestion])
    def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
        try:  # dedent code snippet
@ -229,8 +233,8 @@ class PRCodeSuggestions:
                importance_order = s['importance order']
                data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1]
-            if get_settings().pr_extendeted_code_suggestions.final_clip_factor != 1:
+            if get_settings().pr_code_suggestions.final_clip_factor != 1:
-                new_len = int(0.5 + len(data_sorted) * get_settings().pr_extendeted_code_suggestions.final_clip_factor)
+                new_len = int(0.5 + len(data_sorted) * get_settings().pr_code_suggestions.final_clip_factor)
                data_sorted = data_sorted[:new_len]
        except Exception as e:
            if get_settings().config.verbosity_level >= 1: