Merge pull request #1420 from Codium-ai/tr/review_fix

fix: improve line extraction from files with missing content
2025-07-21 04:50:39 +08:00 · 2024-12-27 09:02:41 +02:00
parent 12d603fdb4 4aad67b563
commit 7b2c41e0d2
2 changed files with 63 additions and 42 deletions
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -364,7 +364,7 @@ __old hunk__


 def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
-
+    try:
        patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
        selected_lines = ""
        patch_lines = patch.splitlines()
@ -407,5 +407,8 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s
                patch_with_lines_str += line + '\n'
                if not line.startswith('-'): # currently we don't support /ask line for deleted lines
                    selected_lines_num += 1
+    except Exception as e:
+        get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()})
+        return "", ""

    return patch_with_lines_str.rstrip(), selected_lines.rstrip()
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -23,6 +23,7 @@ from pydantic import BaseModel
 from starlette_context import context

 from pr_agent.algo import MAX_TOKENS
+from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch
 from pr_agent.algo.token_handler import TokenEncoder
 from pr_agent.algo.types import FilePatchInfo
 from pr_agent.config_loader import get_settings, global_settings
@ -272,7 +273,11 @@ def convert_to_markdown_v2(output_data: dict,

    return markdown_text

-def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False):
+
+def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
+    """
+    Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
+    """
    try:
        relevant_lines_str = ""
        if files:
@ -280,10 +285,23 @@ def extract_relevant_lines_str(end_line, files, relevant_file, start_line, deden
            for file in files:
                if file.filename.strip() == relevant_file:
                    if not file.head_file:
-                        get_logger().warning(f"No content found in file: {file.filename}")
+                        # as a fallback, extract relevant lines directly from patch
+                        patch = file.patch
+                        get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead")
+                        _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')
+                        if not selected_lines:
+                            get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}")
                            return ""
+                        # filter out '-' lines
+                        relevant_lines_str = ""
+                        for line in selected_lines.splitlines():
+                            if line.startswith('-'):
+                                continue
+                            relevant_lines_str += line[1:] + '\n'
+                    else:
                        relevant_file_lines = file.head_file.splitlines()
                        relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
+
                    if dedent and relevant_lines_str:
                        # Remove the longest leading string of spaces and tabs common to all lines.
                        relevant_lines_str = textwrap.dedent(relevant_lines_str)