publish each suggestion separably

2025-07-21 04:50:39 +08:00 · 2023-08-22 16:11:51 +03:00
parent b85679e5e4
commit f4f040bf8d
4 changed files with 42 additions and 28 deletions
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -1,5 +1,4 @@
 from __future__ import annotations
-
 import logging
 import re

@ -157,7 +156,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:

    example output:
 ## src/file.ts
--new hunk--
+__new hunk__
 881        line1
 882        line2
 883        line3
@ -166,7 +165,7 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
 889        line6
 890        line7
 ...
--old hunk--
+__old hunk__
        line1
        line2
 -       line3
@ -177,7 +176,6 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    """
    
    patch_with_lines_str = f"\n\n## {file.filename}\n"
-    import re
    patch_lines = patch.splitlines()
    RE_HUNK_HEADER = re.compile(
        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
@ -185,23 +183,30 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    old_content_lines = []
    match = None
    start1, size1, start2, size2 = -1, -1, -1, -1
+    prev_header_line = []
+    header_line =[]
    for line in patch_lines:
        if 'no newline at end of file' in line.lower():
            continue

        if line.startswith('@@'):
+            header_line = line
            match = RE_HUNK_HEADER.match(line)
            if match and new_content_lines:  # found a new hunk, split the previous lines
                if new_content_lines:
-                    patch_with_lines_str += '\n--new hunk--\n'
+                    if prev_header_line:
+                        patch_with_lines_str += f'\n{prev_header_line}\n'
+                    patch_with_lines_str += '__new hunk__\n'
                    for i, line_new in enumerate(new_content_lines):
                        patch_with_lines_str += f"{start2 + i} {line_new}\n"
                if old_content_lines:
-                    patch_with_lines_str += '--old hunk--\n'
+                    patch_with_lines_str += '__old hunk__\n'
                    for line_old in old_content_lines:
                        patch_with_lines_str += f"{line_old}\n"
                new_content_lines = []
                old_content_lines = []
+            if match:
+                prev_header_line = header_line
            try:
                start1, size1, start2, size2 = map(int, match.groups()[:4])
            except: # '@@ -0,0 +1 @@' case
@ -219,12 +224,13 @@ def convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
    # finishing last hunk
    if match and new_content_lines:
        if new_content_lines:
-            patch_with_lines_str += '\n--new hunk--\n'
+            patch_with_lines_str += f'\n{header_line}\n'
+            patch_with_lines_str += '\n__new hunk__\n'
            for i, line_new in enumerate(new_content_lines):
                patch_with_lines_str += f"{start2 + i} {line_new}\n"
        if old_content_lines:
-            patch_with_lines_str += '\n--old hunk--\n'
+            patch_with_lines_str += '\n__old hunk__\n'
            for line_old in old_content_lines:
                patch_with_lines_str += f"{line_old}\n"

-    return patch_with_lines_str.strip()
+    return patch_with_lines_str.rstrip()
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -24,7 +24,7 @@ OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600
 PATCH_EXTRA_LINES = 3

 def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: str,
-                add_line_numbers_to_hunks: bool = False, disable_extra_lines: bool = False) -> str:
+                add_line_numbers_to_hunks: bool = True, disable_extra_lines: bool = True) -> str:
    """
    Returns a string with the diff of the pull request, applying diff minimization techniques if needed.

@ -103,9 +103,9 @@ def pr_generate_extended_diff(pr_languages: list,

            # extend each patch with extra lines of context
            extended_patch = extend_patch(original_file_content_str, patch, num_lines=PATCH_EXTRA_LINES)
-            full_extended_patch = f"## {file.filename}\n\n{extended_patch}\n"
+            full_extended_patch = f"\n\n## {file.filename}\n\n{extended_patch}\n"

-            if add_line_numbers_to_hunks and PATCH_EXTRA_LINES > 0:
+            if add_line_numbers_to_hunks:
                full_extended_patch = convert_to_hunks_with_lines_numbers(extended_patch, file)

            patch_tokens = token_handler.count_tokens(full_extended_patch)
@ -322,7 +322,9 @@ def clip_tokens(text: str, max_tokens: int) -> str:
    Returns:
        str: The clipped string.
    """
-    # We'll estimate the number of tokens by hueristically assuming 2.5 tokens per word
+    if not text:
+        return text
+
    try:
        encoder = get_token_encoder()
        num_input_tokens = len(encoder.encode(text))