diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 9b319746..8c7943b3 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -364,48 +364,51 @@ __old hunk__ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]: + try: + patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n" + selected_lines = "" + patch_lines = patch.splitlines() + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + match = None + start1, size1, start2, size2 = -1, -1, -1, -1 + skip_hunk = False + selected_lines_num = 0 + for line in patch_lines: + if 'no newline at end of file' in line.lower(): + continue - patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n" - selected_lines = "" - patch_lines = patch.splitlines() - RE_HUNK_HEADER = re.compile( - r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") - match = None - start1, size1, start2, size2 = -1, -1, -1, -1 - skip_hunk = False - selected_lines_num = 0 - for line in patch_lines: - if 'no newline at end of file' in line.lower(): - continue + if line.startswith('@@'): + skip_hunk = False + selected_lines_num = 0 + header_line = line - if line.startswith('@@'): - skip_hunk = False - selected_lines_num = 0 - header_line = line + match = RE_HUNK_HEADER.match(line) - match = RE_HUNK_HEADER.match(line) + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) - section_header, size1, size2, start1, start2 = extract_hunk_headers(match) - - # check if line range is in this hunk - if side.lower() == 'left': # check if line range is in this hunk - if not (start1 <= line_start <= start1 + size1): - skip_hunk = True - continue - elif side.lower() == 'right': - if not (start2 <= line_start <= start2 + size2): - skip_hunk = True - continue - patch_with_lines_str += f'\n{header_line}\n' + if side.lower() == 'left': + # check if line range is in this hunk + if not (start1 <= line_start <= start1 + size1): + skip_hunk = True + continue + elif side.lower() == 'right': + if not (start2 <= line_start <= start2 + size2): + skip_hunk = True + continue + patch_with_lines_str += f'\n{header_line}\n' - elif not skip_hunk: - if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end: - selected_lines += line + '\n' - if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end: - selected_lines += line + '\n' - patch_with_lines_str += line + '\n' - if not line.startswith('-'): # currently we don't support /ask line for deleted lines - selected_lines_num += 1 + elif not skip_hunk: + if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end: + selected_lines += line + '\n' + if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end: + selected_lines += line + '\n' + patch_with_lines_str += line + '\n' + if not line.startswith('-'): # currently we don't support /ask line for deleted lines + selected_lines_num += 1 + except Exception as e: + get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()}) + return "", "" return patch_with_lines_str.rstrip(), selected_lines.rstrip() diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index af829e14..2f2c10d7 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -23,6 +23,7 @@ from pydantic import BaseModel from starlette_context import context from pr_agent.algo import MAX_TOKENS +from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch from pr_agent.algo.token_handler import TokenEncoder from pr_agent.algo.types import FilePatchInfo from pr_agent.config_loader import get_settings, global_settings @@ -272,7 +273,11 @@ def convert_to_markdown_v2(output_data: dict, return markdown_text -def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False): + +def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str: + """ + Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content. + """ try: relevant_lines_str = "" if files: @@ -280,10 +285,23 @@ def extract_relevant_lines_str(end_line, files, relevant_file, start_line, deden for file in files: if file.filename.strip() == relevant_file: if not file.head_file: - get_logger().warning(f"No content found in file: {file.filename}") - return "" - relevant_file_lines = file.head_file.splitlines() - relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line]) + # as a fallback, extract relevant lines directly from patch + patch = file.patch + get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead") + _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right') + if not selected_lines: + get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}") + return "" + # filter out '-' lines + relevant_lines_str = "" + for line in selected_lines.splitlines(): + if line.startswith('-'): + continue + relevant_lines_str += line[1:] + '\n' + else: + relevant_file_lines = file.head_file.splitlines() + relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line]) + if dedent and relevant_lines_str: # Remove the longest leading string of spaces and tabs common to all lines. relevant_lines_str = textwrap.dedent(relevant_lines_str)