fix: improve line extraction from files with missing content

2025-07-21 04:50:39 +08:00 · 2024-12-27 09:00:20 +02:00
parent 12d603fdb4
commit 4aad67b563
2 changed files with 63 additions and 42 deletions
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -364,7 +364,7 @@ __old hunk__
 def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
-
+    try:
        patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
        selected_lines = ""
        patch_lines = patch.splitlines()
@ -407,5 +407,8 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s
                patch_with_lines_str += line + '\n'
                if not line.startswith('-'): # currently we don't support /ask line for deleted lines
                    selected_lines_num += 1
    except Exception as e:
        get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()})
        return "", ""
    return patch_with_lines_str.rstrip(), selected_lines.rstrip()
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -23,6 +23,7 @@ from pydantic import BaseModel
 from starlette_context import context
 from pr_agent.algo import MAX_TOKENS
 from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch
 from pr_agent.algo.token_handler import TokenEncoder
 from pr_agent.algo.types import FilePatchInfo
 from pr_agent.config_loader import get_settings, global_settings
@ -272,7 +273,11 @@ def convert_to_markdown_v2(output_data: dict,
    return markdown_text
-def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False):
+
 def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
    """
    Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
    """
    try:
        relevant_lines_str = ""
        if files:
@ -280,10 +285,23 @@ def extract_relevant_lines_str(end_line, files, relevant_file, start_line, deden
            for file in files:
                if file.filename.strip() == relevant_file:
                    if not file.head_file:
-                        get_logger().warning(f"No content found in file: {file.filename}")
+                        # as a fallback, extract relevant lines directly from patch
                        patch = file.patch
                        get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead")
                        _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')
                        if not selected_lines:
                            get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}")
                            return ""
                        # filter out '-' lines
                        relevant_lines_str = ""
                        for line in selected_lines.splitlines():
                            if line.startswith('-'):
                                continue
                            relevant_lines_str += line[1:] + '\n'
                    else:
                        relevant_file_lines = file.head_file.splitlines()
                        relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
                    if dedent and relevant_lines_str:
                        # Remove the longest leading string of spaces and tabs common to all lines.
                        relevant_lines_str = textwrap.dedent(relevant_lines_str)