Merge pull request #1420 from Codium-ai/tr/review_fix

fix: improve line extraction from files with missing content
This commit is contained in:
Tal
2024-12-27 09:02:41 +02:00
committed by GitHub
2 changed files with 63 additions and 42 deletions

View File

@ -364,48 +364,51 @@ __old hunk__
def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]: def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
try:
patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n"
selected_lines = ""
patch_lines = patch.splitlines()
RE_HUNK_HEADER = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
match = None
start1, size1, start2, size2 = -1, -1, -1, -1
skip_hunk = False
selected_lines_num = 0
for line in patch_lines:
if 'no newline at end of file' in line.lower():
continue
patch_with_lines_str = f"\n\n## File: '{file_name.strip()}'\n\n" if line.startswith('@@'):
selected_lines = "" skip_hunk = False
patch_lines = patch.splitlines() selected_lines_num = 0
RE_HUNK_HEADER = re.compile( header_line = line
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
match = None
start1, size1, start2, size2 = -1, -1, -1, -1
skip_hunk = False
selected_lines_num = 0
for line in patch_lines:
if 'no newline at end of file' in line.lower():
continue
if line.startswith('@@'): match = RE_HUNK_HEADER.match(line)
skip_hunk = False
selected_lines_num = 0
header_line = line
match = RE_HUNK_HEADER.match(line) section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
# check if line range is in this hunk
if side.lower() == 'left':
# check if line range is in this hunk # check if line range is in this hunk
if not (start1 <= line_start <= start1 + size1): if side.lower() == 'left':
skip_hunk = True # check if line range is in this hunk
continue if not (start1 <= line_start <= start1 + size1):
elif side.lower() == 'right': skip_hunk = True
if not (start2 <= line_start <= start2 + size2): continue
skip_hunk = True elif side.lower() == 'right':
continue if not (start2 <= line_start <= start2 + size2):
patch_with_lines_str += f'\n{header_line}\n' skip_hunk = True
continue
patch_with_lines_str += f'\n{header_line}\n'
elif not skip_hunk: elif not skip_hunk:
if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end: if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:
selected_lines += line + '\n' selected_lines += line + '\n'
if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end: if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:
selected_lines += line + '\n' selected_lines += line + '\n'
patch_with_lines_str += line + '\n' patch_with_lines_str += line + '\n'
if not line.startswith('-'): # currently we don't support /ask line for deleted lines if not line.startswith('-'): # currently we don't support /ask line for deleted lines
selected_lines_num += 1 selected_lines_num += 1
except Exception as e:
get_logger().error(f"Failed to extract hunk lines from patch: {e}", artifact={"traceback": traceback.format_exc()})
return "", ""
return patch_with_lines_str.rstrip(), selected_lines.rstrip() return patch_with_lines_str.rstrip(), selected_lines.rstrip()

View File

@ -23,6 +23,7 @@ from pydantic import BaseModel
from starlette_context import context from starlette_context import context
from pr_agent.algo import MAX_TOKENS from pr_agent.algo import MAX_TOKENS
from pr_agent.algo.git_patch_processing import extract_hunk_lines_from_patch
from pr_agent.algo.token_handler import TokenEncoder from pr_agent.algo.token_handler import TokenEncoder
from pr_agent.algo.types import FilePatchInfo from pr_agent.algo.types import FilePatchInfo
from pr_agent.config_loader import get_settings, global_settings from pr_agent.config_loader import get_settings, global_settings
@ -272,7 +273,11 @@ def convert_to_markdown_v2(output_data: dict,
return markdown_text return markdown_text
def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False):
def extract_relevant_lines_str(end_line, files, relevant_file, start_line, dedent=False) -> str:
"""
Finds 'relevant_file' in 'files', and extracts the lines from 'start_line' to 'end_line' string from the file content.
"""
try: try:
relevant_lines_str = "" relevant_lines_str = ""
if files: if files:
@ -280,10 +285,23 @@ def extract_relevant_lines_str(end_line, files, relevant_file, start_line, deden
for file in files: for file in files:
if file.filename.strip() == relevant_file: if file.filename.strip() == relevant_file:
if not file.head_file: if not file.head_file:
get_logger().warning(f"No content found in file: {file.filename}") # as a fallback, extract relevant lines directly from patch
return "" patch = file.patch
relevant_file_lines = file.head_file.splitlines() get_logger().info(f"No content found in file: '{file.filename}' for 'extract_relevant_lines_str'. Using patch instead")
relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line]) _, selected_lines = extract_hunk_lines_from_patch(patch, file.filename, start_line, end_line,side='right')
if not selected_lines:
get_logger().error(f"Failed to extract relevant lines from patch: {file.filename}")
return ""
# filter out '-' lines
relevant_lines_str = ""
for line in selected_lines.splitlines():
if line.startswith('-'):
continue
relevant_lines_str += line[1:] + '\n'
else:
relevant_file_lines = file.head_file.splitlines()
relevant_lines_str = "\n".join(relevant_file_lines[start_line - 1:end_line])
if dedent and relevant_lines_str: if dedent and relevant_lines_str:
# Remove the longest leading string of spaces and tabs common to all lines. # Remove the longest leading string of spaces and tabs common to all lines.
relevant_lines_str = textwrap.dedent(relevant_lines_str) relevant_lines_str = textwrap.dedent(relevant_lines_str)