Code adjustment to support calling is library

2025-07-06 05:40:38 +08:00 · 2023-08-30 10:29:51 +03:00
parent 56828f0170
commit d51e7ee5ad
18 changed files with 155 additions and 170 deletions
--- a/pr_agent/algo/pr_processing.py
+++ b/pr_agent/algo/pr_processing.py
@ -1,19 +1,17 @@
 from __future__ import annotations

-import difflib
 import logging
-import re
 import traceback
-from typing import Any, Callable, List, Tuple
+from typing import Callable, List, Tuple

 from github import RateLimitExceededException

 from pr_agent.algo import MAX_TOKENS
 from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, extend_patch, handle_patch_deletions
 from pr_agent.algo.language_handler import sort_files_by_main_languages
-from pr_agent.algo.token_handler import TokenHandler, get_token_encoder
+from pr_agent.algo.token_handler import TokenHandler
 from pr_agent.config_loader import get_settings
-from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider
+from pr_agent.git_providers.git_provider import GitProvider

 DELETED_FILES_ = "Deleted files:\n"

@ -247,99 +245,6 @@ def _get_all_deployments(all_models: List[str]) -> List[str]:
    return all_deployments


-def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo],
-                                              relevant_file: str,
-                                              relevant_line_in_file: str) -> Tuple[int, int]:
-    """
-    Find the line number and absolute position of a relevant line in a file.
-
-    Args:
-        diff_files (List[FilePatchInfo]): A list of FilePatchInfo objects representing the patches of files.
-        relevant_file (str): The name of the file where the relevant line is located.
-        relevant_line_in_file (str): The content of the relevant line.
-
-    Returns:
-        Tuple[int, int]: A tuple containing the line number and absolute position of the relevant line in the file.
-    """
-    position = -1
-    absolute_position = -1
-    re_hunk_header = re.compile(
-        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
-
-    for file in diff_files:
-        if file.filename.strip() == relevant_file:
-            patch = file.patch
-            patch_lines = patch.splitlines()
-
-            # try to find the line in the patch using difflib, with some margin of error
-            matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file,
-                                                                         patch_lines, n=3, cutoff=0.93)
-            if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'):
-                relevant_line_in_file = matches_difflib[0]
-
-            delta = 0
-            start1, size1, start2, size2 = 0, 0, 0, 0
-            for i, line in enumerate(patch_lines):
-                if line.startswith('@@'):
-                    delta = 0
-                    match = re_hunk_header.match(line)
-                    start1, size1, start2, size2 = map(int, match.groups()[:4])
-                elif not line.startswith('-'):
-                    delta += 1
-
-                if relevant_line_in_file in line and line[0] != '-':
-                    position = i
-                    absolute_position = start2 + delta - 1
-                    break
-
-            if position == -1 and relevant_line_in_file[0] == '+':
-                no_plus_line = relevant_line_in_file[1:].lstrip()
-                for i, line in enumerate(patch_lines):
-                    if line.startswith('@@'):
-                        delta = 0
-                        match = re_hunk_header.match(line)
-                        start1, size1, start2, size2 = map(int, match.groups()[:4])
-                    elif not line.startswith('-'):
-                        delta += 1
-
-                    if no_plus_line in line and line[0] != '-':
-                        # The model might add a '+' to the beginning of the relevant_line_in_file even if originally
-                        # it's a context line
-                        position = i
-                        absolute_position = start2 + delta - 1
-                        break
-    return position, absolute_position
-
-
-def clip_tokens(text: str, max_tokens: int) -> str:
-    """
-    Clip the number of tokens in a string to a maximum number of tokens.
-
-    Args:
-        text (str): The string to clip.
-        max_tokens (int): The maximum number of tokens allowed in the string.
-
-    Returns:
-        str: The clipped string.
-    """
-    if not text:
-        return text
-
-    try:
-        encoder = get_token_encoder()
-        num_input_tokens = len(encoder.encode(text))
-        if num_input_tokens <= max_tokens:
-            return text
-        num_chars = len(text)
-        chars_per_token = num_chars / num_input_tokens
-        num_output_chars = int(chars_per_token * max_tokens)
-        clipped_text = text[:num_output_chars]
-        return clipped_text
-    except Exception as e:
-        logging.warning(f"Failed to clip tokens: {e}")
-        return text
-
-
 def get_pr_multi_diffs(git_provider: GitProvider,
                       token_handler: TokenHandler,
                       model: str,