mirror of
https://github.com/qodo-ai/pr-agent.git
synced 2025-07-02 11:50:37 +08:00
Merge pull request #1420 from Codium-ai/tr/review_fix
fix: improve line extraction from files with missing content
This commit is contained in:
@ -364,48 +364,51 @@ __old hunk__
|
||||
|
||||
|
||||
def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
|
||||
try:
|
||||
patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
|
||||
selected_lines = ""
|
||||
patch_lines = patch.splitlines()
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
match = None
|
||||
start1, size1, start2, size2 = -1, -1, -1, -1
|
||||
skip_hunk = False
|
||||
selected_lines_num = 0
|
||||
for line in patch_lines:
|
||||
if 'no newline at end of file' in line.lower():
|
||||
continue
|
||||
|
||||
patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
|
||||
selected_lines = ""
|
||||
patch_lines = patch.splitlines()
|
||||
RE_HUNK_HEADER = re.compile(
|
||||
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
|
||||
match = None
|
||||
start1, size1, start2, size2 = -1, -1, -1, -1
|
||||
skip_hunk = False
|
||||
selected_lines_num = 0
|
||||
for line in patch_lines:
|
||||
if 'no newline at end of file' in line.lower():
|
||||
continue
|
||||
if line.startswith('@@'):
|
||||
skip_hunk = False
|
||||
selected_lines_num = 0
|
||||
header_line = line
|
||||
|
||||
if line.startswith('@@'):
|
||||
skip_hunk = False
|
||||
selected_lines_num = 0
|
||||
header_line = line
|
||||
match = RE_HUNK_HEADER.match(line)
|
||||
|
||||
match = RE_HUNK_HEADER.match(line)
|
||||
section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
|
||||
|
||||
section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
|
||||
|
||||
# check if line range is in this hunk
|
||||
if side.lower() == 'left':
|
||||
# check if line range is in this hunk
|
||||
if not (start1 <= line_start <= start1 + size1):
|
||||
skip_hunk = True
|
||||
continue
|
||||
elif side.lower() == 'right':
|
||||
if not (start2 <= line_start <= start2 + size2):
|
||||
skip_hunk = True
|
||||
continue
|
||||
patch_with_lines_str += f'\n{header_line}\n'
|
||||
if side.lower() == 'left':
|
||||
# check if line range is in this hunk
|
||||
if not (start1 <= line_start <= start1 + size1):
|
||||
skip_hunk = True
|
||||
continue
|
||||
elif side.lower() == 'right':
|
||||
if not (start2 <= line_start <= start2 + size2):
|
||||
skip_hunk = True
|
||||
continue
|
||||
patch_with_lines_str += f'\n{header_line}\n'
|
||||
|
||||
elif not skip_hunk:
|
||||
if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:
|
||||
selected_lines += line + '\n'
|
||||
if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:
|
||||
selected_lines += line + '\n'
|
||||
patch_with_lines_str += line + '\n'
|
||||
if not line.startswith('-'): # currently we don't support /ask line for deleted lines
|
||||
selected_lines_num += 1
|
||||
elif not skip_hunk:
|
||||
if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:
|
||||
selected_lines += line + '\n'
|
||||
if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:
|
||||
selected_lines += line + '\n'
|
||||
patch_with_lines_str += line + '\n'
|
||||
if not line.startswith('-'): # currently we don't support /ask line for deleted lines
|
||||
selected_lines_num += 1
|
||||
except Exception as e:
|
||||
get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()})
|
||||
return "", ""
|
||||
|
||||
return patch_with_lines_str.rstrip(), selected_lines.rstrip()
|
||||
|
@ -23,6 +23,7 @@ from pydantic import BaseModel
|
||||
from starlette_context import context
|
||||
|
||||
from pr_agent.algo import MAX_TOKENS
|
||||
from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch
|
||||
from pr_agent.algo.token_handler import TokenEncoder
|
||||
from pr_agent.algo.types import FilePatchInfo
|
||||
from pr_agent.config_loader import get_settings, global_settings
|
||||
@ -272,7 +273,11 @@ def convert_to_markdown_v2(output_data: dict,
|
||||
|
||||
return markdown_text
|
||||
|
||||
def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False):
|
||||
|
||||
def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
|
||||
"""
|
||||
Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
|
||||
"""
|
||||
try:
|
||||
relevant_lines_str = ""
|
||||
if files:
|
||||
@ -280,10 +285,23 @@ def extract_relevant_lines_str(end_line, files, relevant_file, start_line, deden
|
||||
for file in files:
|
||||
if file.filename.strip() == relevant_file:
|
||||
if not file.head_file:
|
||||
get_logger().warning(f"No content found in file: {file.filename}")
|
||||
return ""
|
||||
relevant_file_lines = file.head_file.splitlines()
|
||||
relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
|
||||
# as a fallback, extract relevant lines directly from patch
|
||||
patch = file.patch
|
||||
get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead")
|
||||
_, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')
|
||||
if not selected_lines:
|
||||
get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}")
|
||||
return ""
|
||||
# filter out '-' lines
|
||||
relevant_lines_str = ""
|
||||
for line in selected_lines.splitlines():
|
||||
if line.startswith('-'):
|
||||
continue
|
||||
relevant_lines_str += line[1:] + '\n'
|
||||
else:
|
||||
relevant_file_lines = file.head_file.splitlines()
|
||||
relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
|
||||
|
||||
if dedent and relevant_lines_str:
|
||||
# Remove the longest leading string of spaces and tabs common to all lines.
|
||||
relevant_lines_str = textwrap.dedent(relevant_lines_str)
|
||||
|
Reference in New Issue
Block a user