From 7367c62cf917edf006986a9ccd8f6c8855cdaa2d Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 6 Aug 2023 08:31:15 +0300 Subject: [PATCH] TestFindLineNumberOfRelevantLineInFile --- pr_agent/algo/pr_processing.py | 36 +++++++--- ...nd_line_number_of_relevant_line_in_file.py | 68 +++++++++++++++++++ 2 files changed, 93 insertions(+), 11 deletions(-) create mode 100644 tests/unittest/test_find_line_number_of_relevant_line_in_file.py diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index d48b4bde..fb66583d 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -3,7 +3,7 @@ from __future__ import annotations import re import difflib import logging -from typing import Callable, Tuple, List, Any, Sequence +from typing import Callable, Tuple, List, Any from github import RateLimitExceededException from pr_agent.algo import MAX_TOKENS @@ -220,12 +220,25 @@ async def retry_with_fallback_models(f: Callable): raise # Re-raise the last exception -def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], relevant_file: str, +def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], + relevant_file: str, relevant_line_in_file: str) -> Tuple[int, int]: + """ + Find the line number and absolute position of a relevant line in a file. + + Args: + diff_files (List[FilePatchInfo]): A list of FilePatchInfo objects representing the patches of files. + relevant_file (str): The name of the file where the relevant line is located. + relevant_line_in_file (str): The content of the relevant line. + + Returns: + Tuple[int, int]: A tuple containing the line number and absolute position of the relevant line in the file. + """ position = -1 absolute_position = -1 - RE_HUNK_HEADER = re.compile( + re_hunk_header = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + for file in diff_files: if file.filename.strip() == relevant_file: patch = file.patch @@ -233,16 +246,16 @@ def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], r # try to find the line in the patch using difflib, with some margin of error matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file, - file.patch.splitlines(), n=3, cutoff=0.95) + patch_lines, n=3, cutoff=0.93) if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'): relevant_line_in_file = matches_difflib[0] delta = 0 + start1, size1, start2, size2 = 0, 0, 0, 0 for i, line in enumerate(patch_lines): - if line.startswith('@@'): delta = 0 - match = RE_HUNK_HEADER.match(line) + match = re_hunk_header.match(line) start1, size1, start2, size2 = map(int, match.groups()[:4]) elif not line.startswith('-'): delta += 1 @@ -251,18 +264,19 @@ def find_line_number_of_relevant_line_in_file(diff_files: list[FilePatchInfo], r position = i absolute_position = start2 + delta - 1 break - if position == -1: + + if position == -1 and relevant_line_in_file[0] == '+': + no_plus_line = relevant_line_in_file[1:].lstrip() for i, line in enumerate(patch_lines): if line.startswith('@@'): delta = 0 - match = RE_HUNK_HEADER.match(line) + match = re_hunk_header.match(line) start1, size1, start2, size2 = map(int, match.groups()[:4]) elif not line.startswith('-'): delta += 1 - if relevant_line_in_file[0] == '+' and relevant_line_in_file[1:].lstrip() in line and line[ - 0] != '-': - # The model often adds a '+' to the beginning of the relevant_line_in_file even if originally + if no_plus_line in line and line[0] != '-': + # The model might add a '+' to the beginning of the relevant_line_in_file even if originally # it's a context line position = i absolute_position = start2 + delta - 1 diff --git a/tests/unittest/test_find_line_number_of_relevant_line_in_file.py b/tests/unittest/test_find_line_number_of_relevant_line_in_file.py new file mode 100644 index 00000000..7488c6df --- /dev/null +++ b/tests/unittest/test_find_line_number_of_relevant_line_in_file.py @@ -0,0 +1,68 @@ + +# Generated by CodiumAI +from pr_agent.git_providers.git_provider import FilePatchInfo +from pr_agent.algo.pr_processing import find_line_number_of_relevant_line_in_file + + +import pytest + +class TestFindLineNumberOfRelevantLineInFile: + # Tests that the function returns the correct line number and absolute position when the relevant line is found in the patch + def test_relevant_line_found_in_patch(self): + diff_files = [ + FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+line2\n+relevant_line\n', filename='file1') + ] + relevant_file = 'file1' + relevant_line_in_file = 'relevant_line' + expected = (3, 2) # (position in patch, absolute_position in new file) + assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected + + # Tests that the function returns the correct line number and absolute position when a similar line is found using difflib + def test_similar_line_found_using_difflib(self): + diff_files = [ + FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line in file similar match\n', filename='file1') + ] + relevant_file = 'file1' + relevant_line_in_file = '+relevant_line in file similar match ' # note the space at the end. This is to simulate a similar line found using difflib + expected = (2, 1) + assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected + + # Tests that the function returns (-1, -1) when the relevant line is not found in the patch and no similar line is found using difflib + def test_relevant_line_not_found(self): + diff_files = [ + FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1') + ] + relevant_file = 'file1' + relevant_line_in_file = 'not_found' + expected = (-1, -1) + assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected + + # Tests that the function returns (-1, -1) when the relevant file is not found in any of the patches + def test_relevant_file_not_found(self): + diff_files = [ + FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file2') + ] + relevant_file = 'file1' + relevant_line_in_file = 'relevant_line' + expected = (-1, -1) + assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected + + # Tests that the function returns (-1, -1) when the relevant_line_in_file is an empty string + def test_empty_relevant_line(self): + diff_files = [ + FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,1 +1,2 @@\n-line1\n+relevant_line\n', filename='file1') + ] + relevant_file = 'file1' + relevant_line_in_file = '' + expected = (0, 0) + assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected + + # Tests that the function returns (-1, -1) when the relevant_line_in_file is found in the patch but it is a deleted line + def test_relevant_line_found_but_deleted(self): + diff_files = [ + FilePatchInfo(base_file='file1', head_file='file1', patch='@@ -1,2 +1,1 @@\n-line1\n-relevant_line\n', filename='file1') + ] + relevant_file = 'file1' + relevant_line_in_file = 'relevant_line' + expected = (-1, -1) + assert find_line_number_of_relevant_line_in_file(diff_files, relevant_file, relevant_line_in_file) == expected \ No newline at end of file