From f4f040bf8d8443224f083d4cb544c206de9767fc Mon Sep 17 00:00:00 2001
From: mrT23 <tal.r@codium.ai>
Date: Tue, 22 Aug 2023 16:11:51 +0300
Subject: [PATCH] publish each suggestion separably

---
 pr_agent/algo/git_patch_processing.py         | 24 ++++++++++++-------
 pr_agent/algo/pr_processing.py                | 10 ++++----
 .../settings/pr_code_suggestions_prompts.toml | 24 ++++++++++---------
 pr_agent/tools/pr_code_suggestions.py         | 12 ++++++----
 4 files changed, 42 insertions(+), 28 deletions(-)

diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index 230df41e..1a2bd22b 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -1,5 +1,4 @@
 from __future__ import annotations
-
 import logging
 import re
 
@@ -157,7 +156,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
 
     example output:
 ## src/file.ts
---new hunk--
+__new hunk__
 881        line1
 882        line2
 883        line3
@@ -166,7 +165,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
 889        line6
 890        line7
 ...
---old hunk--
+__old hunk__
         line1
         line2
 -       line3
@@ -177,7 +176,6 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
     """
     
     patch_with_lines_str = f"\n\n## {file.filename}\n"
-    import re
     patch_lines = patch.splitlines()
     RE_HUNK_HEADER = re.compile(
         r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
@@ -185,23 +183,30 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
     old_content_lines = []
     match = None
     start1, size1, start2, size2 = -1, -1, -1, -1
+    prev_header_line = []
+    header_line =[]
     for line in patch_lines:
         if 'no newline at end of file' in line.lower():
             continue
 
         if line.startswith('@@'):
+            header_line = line
             match = RE_HUNK_HEADER.match(line)
             if match and new_content_lines:  # found a new hunk, split the previous lines
                 if new_content_lines:
-                    patch_with_lines_str += '\n--new hunk--\n'
+                    if prev_header_line:
+                        patch_with_lines_str += f'\n{prev_header_line}\n'
+                    patch_with_lines_str += '__new hunk__\n'
                     for i, line_new in enumerate(new_content_lines):
                         patch_with_lines_str += f"{start2 + i} {line_new}\n"
                 if old_content_lines:
-                    patch_with_lines_str += '--old hunk--\n'
+                    patch_with_lines_str += '__old hunk__\n'
                     for line_old in old_content_lines:
                         patch_with_lines_str += f"{line_old}\n"
                 new_content_lines = []
                 old_content_lines = []
+            if match:
+                prev_header_line = header_line
             try:
                 start1, size1, start2, size2 = map(int, match.groups()[:4])
             except: # '@@ -0,0 +1 @@' case
@@ -219,12 +224,13 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
     # finishing last hunk
     if match and new_content_lines:
         if new_content_lines:
-            patch_with_lines_str += '\n--new hunk--\n'
+            patch_with_lines_str += f'\n{header_line}\n'
+            patch_with_lines_str += '\n__new hunk__\n'
             for i, line_new in enumerate(new_content_lines):
                 patch_with_lines_str += f"{start2 + i} {line_new}\n"
         if old_content_lines:
-            patch_with_lines_str += '\n--old hunk--\n'
+            patch_with_lines_str += '\n__old hunk__\n'
             for line_old in old_content_lines:
                 patch_with_lines_str += f"{line_old}\n"
 
-    return patch_with_lines_str.strip()
+    return patch_with_lines_str.rstrip()
diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py
index 1003f456..29709d29 100644
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@@ -24,7 +24,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
 PATCH_EXTRA_LINES = 3
 
 def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
-                add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
+                add_line_numbers_to_hunks: bool = True, disable_extra_lines: bool = True) -> str:
     """
     Returns a string with the diff of the pull request, applying diff minimization techniques if needed.
 
@@ -103,9 +103,9 @@ def pr_generate_extended_diff(pr_languages: list,
 
             # extend each patch with extra lines of context
             extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
-            full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n"
+            full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"
 
-            if add_line_numbers_to_hunks and PATCH_EXTRA_LINES > 0:
+            if add_line_numbers_to_hunks:
                 full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)
 
             patch_tokens = token_handler.count_tokens(full_extended_patch)
@@ -322,7 +322,9 @@ def clip_tokens(text: str, max_tokens: int) -> str:
     Returns:
         str: The clipped string.
     """
-    # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
+    if not text:
+        return text
+
     try:
         encoder = get_token_encoder()
         num_input_tokens = len(encoder.encode(text))
diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml
index be90c840..e32ffd72 100644
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@@ -6,22 +6,23 @@ Example PR Diff input:
 '
 ## src/file1.py
 
---new hunk--
+@@ -12,3 +12,5 @@ def func1():
+__new hunk__
 12  code line that already existed in the file...
 13  code line that already existed in the file....
 14 +new code line added in the PR
 15  code line that already existed in the file...
 16  code line that already existed in the file...
-
---old hunk--
+__old hunk__
  code line that already existed in the file...
 -code line that was removed in the PR
  code line that already existed in the file...
 
 
---new hunk--
+@@ ... @@ def func2():
+__new hunk__
 ...
---old hunk--
+__old hunk__
 ...
 
 
@@ -31,11 +32,12 @@ Example PR Diff input:
 
 Specific instructions:
 - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
-- Suggestions should refer only to code from the '--new hunk--' sections, and focus on new lines of code (lines starting with '+').
+- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
 - Provide the exact line number range (inclusive) for each issue.
 - Assume there is additional relevant code, that is not included in the diff.
 - Provide up to {{ num_code_suggestions }} code suggestions.
-- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '--new hunk--' code.
+- Avoid making suggestions that have already been implemented in the PR code. For example, if you propose adding a docstring, type hint, or anything else, make sure it isn't already in the '__new hunk__' code.
+- Don't suggest to add docstring or type hints.
 
 {%- if extra_instructions %}
 
@@ -58,19 +60,19 @@ You must use the following JSON schema to format your answer:
         },
         "suggestion content": {
           "type": "string",
-          "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '--new hunk--' sections, starting with '+')."
+          "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
         },
         "existing code": {
           "type": "string",
-          "description": "a code snippet showing the relevant code lines from a '--new hunk--' section. It must be continuous, correctly formatted and indented, and without line numbers."
+          "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
         },
         "relevant lines": {
           "type": "string",
-          "description": "the relevant lines from a '--new hunk--' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
+          "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
         },
         "improved code": {
           "type": "string",
-          "description": "a new code snippet that can be used to replace the relevant lines in '--new hunk--' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
+          "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
         }
       }
     }
diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py
index 4dc2f400..cc787f5e 100644
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@@ -70,7 +70,7 @@ class PRCodeSuggestions:
         if get_settings().config.publish_output:
             logging.info('Pushing PR review...')
             self.git_provider.remove_initial_comment()
-            logging.info('Pushing inline code comments...')
+            logging.info('Pushing inline code suggestions...')
             self.push_inline_code_suggestions(data)
 
     async def _prepare_prediction(self, model: str):
@@ -138,7 +138,11 @@ class PRCodeSuggestions:
                 if get_settings().config.verbosity_level >= 2:
                     logging.info(f"Could not parse suggestion: {d}")
 
-        self.git_provider.publish_code_suggestions(code_suggestions)
+        is_successful = self.git_provider.publish_code_suggestions(code_suggestions)
+        if not is_successful:
+            logging.info("Failed to publish code suggestions, trying to publish each suggestion separately")
+            for code_suggestion in code_suggestions:
+                self.git_provider.publish_code_suggestions([code_suggestion])
 
     def dedent_code(self, relevant_file, relevant_lines_start, new_code_snippet):
         try:  # dedent code snippet
@@ -229,8 +233,8 @@ class PRCodeSuggestions:
                 importance_order = s['importance order']
                 data_sorted[importance_order - 1] = suggestion_list[suggestion_number - 1]
 
-            if get_settings().pr_extendeted_code_suggestions.final_clip_factor != 1:
-                new_len = int(0.5 + len(data_sorted) * get_settings().pr_extendeted_code_suggestions.final_clip_factor)
+            if get_settings().pr_code_suggestions.final_clip_factor != 1:
+                new_len = int(0.5 + len(data_sorted) * get_settings().pr_code_suggestions.final_clip_factor)
                 data_sorted = data_sorted[:new_len]
         except Exception as e:
             if get_settings().config.verbosity_level >= 1: