TestFindLineNumberOfRelevantLineInFile

This commit is contained in:
mrT23
2023-08-06 08:31:15 +03:00
parent fed0ea349a
commit 7367c62cf9
2 changed files with 93 additions and 11 deletions

View File

@ -3,7 +3,7 @@ from __future__ import annotations
import re import re
import difflib import difflib
import logging import logging
from typing import Callable, Tuple, List, Any, Sequence from typing import Callable, Tuple, List, Any
from github import RateLimitExceededException from github import RateLimitExceededException
from pr_agent.algo import MAX_TOKENS from pr_agent.algo import MAX_TOKENS
@ -220,12 +220,25 @@ async def retry_with_fallback_models(f: Callable):
raise # Re-raise the last exception raise # Re-raise the last exception
def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], relevant_file: str, def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
relevant_file: str,
relevant_line_in_file: str) -> Tuple[int, int]: relevant_line_in_file: str) -> Tuple[int, int]:
"""
Find the line number and absolute position of a relevant line in a file.
Args:
diff_files (List[FilePatchInfo]): A list of FilePatchInfo objects representing the patches of files.
relevant_file (str): The name of the file where the relevant line is located.
relevant_line_in_file (str): The content of the relevant line.
Returns:
Tuple[int, int]: A tuple containing the line number and absolute position of the relevant line in the file.
"""
position = -1 position = -1
absolute_position = -1 absolute_position = -1
RE_HUNK_HEADER = re.compile( re_hunk_header = re.compile(
r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
for file in diff_files: for file in diff_files:
if file.filename.strip() == relevant_file: if file.filename.strip() == relevant_file:
patch = file.patch patch = file.patch
@ -233,16 +246,16 @@ def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], r
# try to find the line in the patch using difflib, with some margin of error # try to find the line in the patch using difflib, with some margin of error
matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file, matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
file.patch.splitlines(), n=3, cutoff=0.95) patch_lines, n=3, cutoff=0.93)
if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'): if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
relevant_line_in_file = matches_difflib[0] relevant_line_in_file = matches_difflib[0]
delta = 0 delta = 0
start1, size1, start2, size2 = 0, 0, 0, 0
for i, line in enumerate(patch_lines): for i, line in enumerate(patch_lines):
if line.startswith('@@'): if line.startswith('@@'):
delta = 0 delta = 0
match = RE_HUNK_HEADER.match(line) match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4]) start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'): elif not line.startswith('-'):
delta += 1 delta += 1
@ -251,18 +264,19 @@ def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], r
position = i position = i
absolute_position = start2 + delta - 1 absolute_position = start2 + delta - 1
break break
if position == -1:
if position == -1 and relevant_line_in_file[0] == '+':
no_plus_line = relevant_line_in_file[1:].lstrip()
for i, line in enumerate(patch_lines): for i, line in enumerate(patch_lines):
if line.startswith('@@'): if line.startswith('@@'):
delta = 0 delta = 0
match = RE_HUNK_HEADER.match(line) match = re_hunk_header.match(line)
start1, size1, start2, size2 = map(int, match.groups()[:4]) start1, size1, start2, size2 = map(int, match.groups()[:4])
elif not line.startswith('-'): elif not line.startswith('-'):
delta += 1 delta += 1
if relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line and line[ if no_plus_line in line and line[0] != '-':
0] != '-': # The model might add a '+' to the beginning of the relevant_line_in_file even if originally
# The model often adds a '+' to the beginning of the relevant_line_in_file even if originally
# it's a context line # it's a context line
position = i position = i
absolute_position = start2 + delta - 1 absolute_position = start2 + delta - 1

View File

@ -0,0 +1,68 @@
# Generated by CodiumAI
from pr_agent.git_providers.git_provider import FilePatchInfo
from pr_agent.algo.pr_processing import find_line_number_of_relevant_line_in_file
import pytest
class TestFindLineNumberOfRelevantLineInFile:
# Tests that the function returns the correct line number and absolute position when the relevant line is found in the patch
def test_relevant_line_found_in_patch(self):
diff_files = [
FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+line2\n+relevant_line\n', filename='file1')
]
relevant_file = 'file1'
relevant_line_in_file = 'relevant_line'
expected = (3, 2) # (position in patch, absolute_position in new file)
assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
# Tests that the function returns the correct line number and absolute position when a similar line is found using difflib
def test_similar_line_found_using_difflib(self):
diff_files = [
FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line in file similar match\n', filename='file1')
]
relevant_file = 'file1'
relevant_line_in_file = '+relevant_line in file similar match ' # note the space at the end. This is to simulate a similar line found using difflib
expected = (2, 1)
assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
# Tests that the function returns (-1, -1) when the relevant line is not found in the patch and no similar line is found using difflib
def test_relevant_line_not_found(self):
diff_files = [
FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1')
]
relevant_file = 'file1'
relevant_line_in_file = 'not_found'
expected = (-1, -1)
assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
# Tests that the function returns (-1, -1) when the relevant file is not found in any of the patches
def test_relevant_file_not_found(self):
diff_files = [
FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file2')
]
relevant_file = 'file1'
relevant_line_in_file = 'relevant_line'
expected = (-1, -1)
assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
# Tests that the function returns (-1, -1) when the relevant_line_in_file is an empty string
def test_empty_relevant_line(self):
diff_files = [
FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1')
]
relevant_file = 'file1'
relevant_line_in_file = ''
expected = (0, 0)
assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected
# Tests that the function returns (-1, -1) when the relevant_line_in_file is found in the patch but it is a deleted line
def test_relevant_line_found_but_deleted(self):
diff_files = [
FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,2 +1,1 @@\n-line1\n-relevant_line\n', filename='file1')
]
relevant_file = 'file1'
relevant_line_in_file = 'relevant_line'
expected = (-1, -1)
assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected