diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 10d140b0..72c9aebf 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -3,7 +3,7 @@ from __future__ import annotations import re from pr_agent.config_loader import get_settings -from pr_agent.git_providers.git_provider import EDIT_TYPE +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from pr_agent.log import get_logger diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 19e16a8e..30738236 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -1,9 +1,7 @@ from __future__ import annotations -import difflib -import re import traceback -from typing import Any, Callable, List, Tuple +from typing import Callable, List, Tuple from github import RateLimitExceededException @@ -13,7 +11,8 @@ from pr_agent.algo.file_filter import filter_ignored from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.utils import get_max_tokens, ModelType from pr_agent.config_loader import get_settings -from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE +from pr_agent.git_providers.git_provider import GitProvider +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from pr_agent.log import get_logger DELETED_FILES_ = "Deleted files:\n" @@ -270,78 +269,6 @@ def _get_all_deployments(all_models: List[str]) -> List[str]: return all_deployments -def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], - relevant_file: str, - relevant_line_in_file: str, - absolute_position: int = None) -> Tuple[int, int]: - position = -1 - if absolute_position is None: - absolute_position = -1 - re_hunk_header = re.compile( - r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") - - for file in diff_files: - if file.filename and (file.filename.strip() == relevant_file): - patch = file.patch - patch_lines = patch.splitlines() - delta = 0 - start1, size1, start2, size2 = 0, 0, 0, 0 - if absolute_position != -1: # matching absolute to relative - for i, line in enumerate(patch_lines): - # new hunk - if line.startswith('@@'): - delta = 0 - match = re_hunk_header.match(line) - start1, size1, start2, size2 = map(int, match.groups()[:4]) - elif not line.startswith('-'): - delta += 1 - - # - absolute_position_curr = start2 + delta - 1 - - if absolute_position_curr == absolute_position: - position = i - break - else: - # try to find the line in the patch using difflib, with some margin of error - matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file, - patch_lines, n=3, cutoff=0.93) - if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'): - relevant_line_in_file = matches_difflib[0] - - - for i, line in enumerate(patch_lines): - if line.startswith('@@'): - delta = 0 - match = re_hunk_header.match(line) - start1, size1, start2, size2 = map(int, match.groups()[:4]) - elif not line.startswith('-'): - delta += 1 - - if relevant_line_in_file in line and line[0] != '-': - position = i - absolute_position = start2 + delta - 1 - break - - if position == -1 and relevant_line_in_file[0] == '+': - no_plus_line = relevant_line_in_file[1:].lstrip() - for i, line in enumerate(patch_lines): - if line.startswith('@@'): - delta = 0 - match = re_hunk_header.match(line) - start1, size1, start2, size2 = map(int, match.groups()[:4]) - elif not line.startswith('-'): - delta += 1 - - if no_plus_line in line and line[0] != '-': - # The model might add a '+' to the beginning of the relevant_line_in_file even if originally - # it's a context line - position = i - absolute_position = start2 + delta - 1 - break - return position, absolute_position - - def get_pr_multi_diffs(git_provider: GitProvider, token_handler: TokenHandler, model: str, diff --git a/pr_agent/algo/types.py b/pr_agent/algo/types.py new file mode 100644 index 00000000..045115b4 --- /dev/null +++ b/pr_agent/algo/types.py @@ -0,0 +1,23 @@ +from dataclasses import dataclass +from enum import Enum + + +class EDIT_TYPE(Enum): + ADDED = 1 + DELETED = 2 + MODIFIED = 3 + RENAMED = 4 + UNKNOWN = 5 + + +@dataclass +class FilePatchInfo: + base_file: str + head_file: str + patch: str + filename: str + tokens: int = -1 + edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN + old_filename: str = None + num_plus_lines: int = -1 + num_minus_lines: int = -1 diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index e92c3219..f7b1f7cd 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -6,7 +6,7 @@ import re import textwrap from datetime import datetime from enum import Enum -from typing import Any, List +from typing import Any, List, Tuple import yaml from starlette_context import context @@ -14,6 +14,7 @@ from starlette_context import context from pr_agent.algo import MAX_TOKENS from pr_agent.algo.token_handler import get_token_encoder from pr_agent.config_loader import get_settings, global_settings +from pr_agent.algo.types import FilePatchInfo from pr_agent.log import get_logger class ModelType(str, Enum): @@ -487,4 +488,76 @@ def replace_code_tags(text): parts = text.split('`') for i in range(1, len(parts), 2): parts[i] = '' + parts[i] + '' - return ''.join(parts) \ No newline at end of file + return ''.join(parts) + + +def find_line_number_of_relevant_line_in_file(diff_files: List[FilePatchInfo], + relevant_file: str, + relevant_line_in_file: str, + absolute_position: int = None) -> Tuple[int, int]: + position = -1 + if absolute_position is None: + absolute_position = -1 + re_hunk_header = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + + for file in diff_files: + if file.filename and (file.filename.strip() == relevant_file): + patch = file.patch + patch_lines = patch.splitlines() + delta = 0 + start1, size1, start2, size2 = 0, 0, 0, 0 + if absolute_position != -1: # matching absolute to relative + for i, line in enumerate(patch_lines): + # new hunk + if line.startswith('@@'): + delta = 0 + match = re_hunk_header.match(line) + start1, size1, start2, size2 = map(int, match.groups()[:4]) + elif not line.startswith('-'): + delta += 1 + + # + absolute_position_curr = start2 + delta - 1 + + if absolute_position_curr == absolute_position: + position = i + break + else: + # try to find the line in the patch using difflib, with some margin of error + matches_difflib: list[str | Any] = difflib.get_close_matches(relevant_line_in_file, + patch_lines, n=3, cutoff=0.93) + if len(matches_difflib) == 1 and matches_difflib[0].startswith('+'): + relevant_line_in_file = matches_difflib[0] + + + for i, line in enumerate(patch_lines): + if line.startswith('@@'): + delta = 0 + match = re_hunk_header.match(line) + start1, size1, start2, size2 = map(int, match.groups()[:4]) + elif not line.startswith('-'): + delta += 1 + + if relevant_line_in_file in line and line[0] != '-': + position = i + absolute_position = start2 + delta - 1 + break + + if position == -1 and relevant_line_in_file[0] == '+': + no_plus_line = relevant_line_in_file[1:].lstrip() + for i, line in enumerate(patch_lines): + if line.startswith('@@'): + delta = 0 + match = re_hunk_header.match(line) + start1, size1, start2, size2 = map(int, match.groups()[:4]) + elif not line.startswith('-'): + delta += 1 + + if no_plus_line in line and line[0] != '-': + # The model might add a '+' to the beginning of the relevant_line_in_file even if originally + # it's a context line + position = i + absolute_position = start2 + delta - 1 + break + return position, absolute_position diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index 17d87488..a46c0ab2 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -6,7 +6,8 @@ from ..log import get_logger from ..algo.language_handler import is_valid_file from ..algo.utils import clip_tokens, load_large_diff from ..config_loader import get_settings -from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from .git_provider import GitProvider +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo AZURE_DEVOPS_AVAILABLE = True diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index c98ebc05..c761d10b 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -6,10 +6,11 @@ import requests from atlassian.bitbucket import Cloud from starlette_context import context -from ..algo.pr_processing import find_line_number_of_relevant_line_in_file +from pr_agent.algo.types import FilePatchInfo, EDIT_TYPE +from ..algo.utils import find_line_number_of_relevant_line_in_file from ..config_loader import get_settings from ..log import get_logger -from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE +from .git_provider import GitProvider class BitbucketProvider(GitProvider): diff --git a/pr_agent/git_providers/bitbucket_server_provider.py b/pr_agent/git_providers/bitbucket_server_provider.py index 2d96120b..9798cd5e 100644 --- a/pr_agent/git_providers/bitbucket_server_provider.py +++ b/pr_agent/git_providers/bitbucket_server_provider.py @@ -6,9 +6,9 @@ import requests from atlassian.bitbucket import Bitbucket from starlette_context import context -from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE -from ..algo.pr_processing import find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff +from .git_provider import GitProvider +from pr_agent.algo.types import FilePatchInfo +from ..algo.utils import load_large_diff, find_line_number_of_relevant_line_in_file from ..config_loader import get_settings from ..log import get_logger diff --git a/pr_agent/git_providers/codecommit_provider.py b/pr_agent/git_providers/codecommit_provider.py index 3c7b7697..50398c17 100644 --- a/pr_agent/git_providers/codecommit_provider.py +++ b/pr_agent/git_providers/codecommit_provider.py @@ -5,9 +5,9 @@ from typing import List, Optional, Tuple from urllib.parse import urlparse from pr_agent.git_providers.codecommit_client import CodeCommitClient - +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from ..algo.utils import load_large_diff -from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from .git_provider import GitProvider from ..config_loader import get_settings from ..log import get_logger diff --git a/pr_agent/git_providers/gerrit_provider.py b/pr_agent/git_providers/gerrit_provider.py index f7dd05ac..a1491c78 100644 --- a/pr_agent/git_providers/gerrit_provider.py +++ b/pr_agent/git_providers/gerrit_provider.py @@ -13,7 +13,8 @@ import urllib3.util from git import Repo from pr_agent.config_loader import get_settings -from pr_agent.git_providers.git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from pr_agent.git_providers.git_provider import GitProvider +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from pr_agent.git_providers.local_git_provider import PullRequestMimic from pr_agent.log import get_logger diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index f981d863..ae2109b5 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -1,35 +1,13 @@ from abc import ABC, abstractmethod -from dataclasses import dataclass # enum EDIT_TYPE (ADDED, DELETED, MODIFIED, RENAMED) -from enum import Enum from typing import Optional from pr_agent.config_loader import get_settings +from pr_agent.algo.types import FilePatchInfo from pr_agent.log import get_logger -class EDIT_TYPE(Enum): - ADDED = 1 - DELETED = 2 - MODIFIED = 3 - RENAMED = 4 - UNKNOWN = 5 - - -@dataclass -class FilePatchInfo: - base_file: str - head_file: str - patch: str - filename: str - tokens: int = -1 - edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN - old_filename: str = None - num_plus_lines: int = -1 - num_minus_lines: int = -1 - - class GitProvider(ABC): @abstractmethod def is_supported(self, capability: str) -> bool: diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index aaf1f386..9b89973d 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -9,12 +9,12 @@ from retry import retry from starlette_context import context from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff, clip_tokens +from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded -from .git_provider import FilePatchInfo, GitProvider, IncrementalPR, EDIT_TYPE +from .git_provider import GitProvider, IncrementalPR +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo class GithubProvider(GitProvider): diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 4db37305..85525e6c 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -7,10 +7,10 @@ import gitlab from gitlab import GitlabGetError from ..algo.language_handler import is_valid_file -from ..algo.pr_processing import find_line_number_of_relevant_line_in_file -from ..algo.utils import load_large_diff, clip_tokens +from ..algo.utils import load_large_diff, clip_tokens, find_line_number_of_relevant_line_in_file from ..config_loader import get_settings -from .git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from .git_provider import GitProvider +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from ..log import get_logger diff --git a/pr_agent/git_providers/local_git_provider.py b/pr_agent/git_providers/local_git_provider.py index 3d45d35c..83c50791 100644 --- a/pr_agent/git_providers/local_git_provider.py +++ b/pr_agent/git_providers/local_git_provider.py @@ -5,7 +5,8 @@ from typing import List from git import Repo from pr_agent.config_loader import _find_repository_root, get_settings -from pr_agent.git_providers.git_provider import EDIT_TYPE, FilePatchInfo, GitProvider +from pr_agent.git_providers.git_provider import GitProvider +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo from pr_agent.log import get_logger diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 5c7a35eb..7ab2f8cb 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -9,11 +9,11 @@ verbosity_level=0 # 0,1,2 use_extra_bad_extensions=false use_repo_settings_file=true use_global_settings_file=true -ai_timeout=180 +ai_timeout=90 max_description_tokens = 500 max_commits_tokens = 500 max_model_tokens = 32000 # Limits the maximum number of tokens that can be used by any model, regardless of the model's default capabilities. -patch_extra_lines = 3 +patch_extra_lines = 1 secret_provider="google_cloud_storage" cli_mode=false diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index fb725848..5fc45619 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -325,7 +325,7 @@ class PRCodeSuggestions: pr_body += "" header = f"Suggestions" - delta = 77 + delta = 75 header += "  " * delta pr_body += f"""""" pr_body += """""" diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 5eb78549..4916de48 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -9,7 +9,7 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels +from pr_agent.algo.utils import load_yaml, set_custom_labels, get_user_labels, ModelType from pr_agent.config_loader import get_settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language @@ -80,7 +80,7 @@ class PRDescription: if get_settings().config.publish_output: self.git_provider.publish_comment("Preparing PR description...", is_temporary=True) - await retry_with_fallback_models(self._prepare_prediction) + await retry_with_fallback_models(self._prepare_prediction, ModelType.TURBO) # turbo model because larger context get_logger().info(f"Preparing answer {self.pr_id}") if self.prediction: @@ -363,7 +363,7 @@ class PRDescription: try: pr_body += "
{header}
" header = f"Relevant files" - delta = 77 + delta = 75 # header += "  " * delta pr_body += f"""""" pr_body += """""" @@ -379,8 +379,7 @@ class PRDescription: for filename, file_changes_title, file_change_description in list_tuples: filename = filename.replace("'", "`") filename_publish = filename.split("/")[-1] - file_changes_title_br = insert_br_after_x_chars(file_changes_title, x=(delta - 5), - new_line_char="\n\n") + file_changes_title_br = insert_br_after_x_chars(file_changes_title, x=(delta - 5)) file_changes_title_extended = file_changes_title_br.strip() + "" if len(file_changes_title_extended) < (delta - 5): file_changes_title_extended += "  " * ((delta - 5) - len(file_changes_title_extended)) @@ -428,48 +427,74 @@ class PRDescription: pass return pr_body -def insert_br_after_x_chars(text, x=70, new_line_char="
"): +def insert_br_after_x_chars(text, x=70): """ Insert
into a string after a word that increases its length above x characters. + Use proper HTML tags for code and new lines. """ if len(text) < x: return text - lines = text.splitlines() + # replace odd instances of ` with and even instances of ` with + text = replace_code_tags(text) + + # convert list items to
  • + if text.startswith("- "): + text = "
  • " + text[2:] + text = text.replace("\n- ", '
  • ').replace("\n - ", '
  • ') + + # convert new lines to
    + text = text.replace("\n", '
    ') + + # split text into lines + lines = text.split('
    ') words = [] - for i,line in enumerate(lines): + for i, line in enumerate(lines): words += line.split(' ') - if i]+>', '', string) + return len(no_html_string) - # words = text.split(' ') - - new_text = "" - current_length = 0 + new_text = [] is_inside_code = False + current_length = 0 for word in words: - # Check if adding this word exceeds x characters - if current_length + len(word) > x: - if not is_inside_code: - new_text += f"{new_line_char} " # Insert line break - current_length = 0 # Reset counter - else: - new_text += f"`{new_line_char} `" - # check if inside tag - if word.startswith("`") and not is_inside_code and not word.endswith("`"): + is_saved_word = False + if word == "" or word == "" or word == "
  • " or word == "
    ": + is_saved_word = True + if "" in word: is_inside_code = True - if word.endswith("`"): + if "" in word: is_inside_code = False - # Add the word to the new text - if word.endswith("\n"): - new_text += word - else: - new_text += word + " " - current_length += len(word) + 1 # Add 1 for the space + len_word = count_chars_without_html(word) + if not is_saved_word and (current_length + len_word > x): + if is_inside_code: + new_text.append("

    ") + else: + new_text.append("
    ") + current_length = 0 # Reset counter + new_text.append(word + " ") + if not is_saved_word: + current_length += len_word + 1 # Add 1 for the space - if word.endswith("\n"): + if word == "
  • " or word == "
    ": current_length = 0 - return new_text.strip() # Remove trailing space + + return ''.join(new_text).strip() + +def replace_code_tags(text): + """ + Replace odd instances of ` with and even instances of ` with + """ + parts = text.split('`') + for i in range(1, len(parts), 2): + parts[i] = '' + parts[i] + '' + return ''.join(parts) + diff --git a/tests/unittest/test_codecommit_provider.py b/tests/unittest/test_codecommit_provider.py index 6f187de7..56312d73 100644 --- a/tests/unittest/test_codecommit_provider.py +++ b/tests/unittest/test_codecommit_provider.py @@ -3,7 +3,7 @@ from unittest.mock import patch from pr_agent.git_providers.codecommit_provider import CodeCommitFile from pr_agent.git_providers.codecommit_provider import CodeCommitProvider from pr_agent.git_providers.codecommit_provider import PullRequestCCMimic -from pr_agent.git_providers.git_provider import EDIT_TYPE +from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo class TestCodeCommitFile: diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py index 87f8f983..72d34656 100644 --- a/tests/unittest/test_convert_to_markdown.py +++ b/tests/unittest/test_convert_to_markdown.py @@ -1,5 +1,6 @@ # Generated by CodiumAI from pr_agent.algo.utils import convert_to_markdown +from pr_agent.tools.pr_description import insert_br_after_x_chars """ Code Analysis @@ -93,3 +94,27 @@ class TestConvertToMarkdown: } expected_output = '' assert convert_to_markdown(input_data).strip() == expected_output.strip() + + +class TestBR: + def test_br1(self): + file_change_description = '- Imported `FilePatchInfo` and `EDIT_TYPE` from `pr_agent.algo.types` instead of `pr_agent.git_providers.git_provider`.' + file_change_description_br = insert_br_after_x_chars(file_change_description) + expected_output = ('
  • Imported FilePatchInfo and EDIT_TYPE from ' + 'pr_agent.algo.types instead
    of ' + 'pr_agent.git_providers.git_provider.') + assert file_change_description_br == expected_output + # print("-----") + # print(file_change_description_br) + + def test_br2(self): + file_change_description = ('- Created a - new -class `ColorPaletteResourcesCollection ColorPaletteResourcesCollection ' + 'ColorPaletteResourcesCollection ColorPaletteResourcesCollection`') + file_change_description_br = insert_br_after_x_chars(file_change_description) + expected_output = ('
  • Created a - new -class ColorPaletteResourcesCollection
    ' + 'ColorPaletteResourcesCollection ColorPaletteResourcesCollection
    ' + 'ColorPaletteResourcesCollection
    ') + assert file_change_description_br == expected_output + # print("-----") + # print(file_change_description_br) + diff --git a/tests/unittest/test_find_line_number_of_relevant_line_in_file.py b/tests/unittest/test_find_line_number_of_relevant_line_in_file.py index 7488c6df..fcb028ca 100644 --- a/tests/unittest/test_find_line_number_of_relevant_line_in_file.py +++ b/tests/unittest/test_find_line_number_of_relevant_line_in_file.py @@ -1,8 +1,7 @@ # Generated by CodiumAI -from pr_agent.git_providers.git_provider import FilePatchInfo -from pr_agent.algo.pr_processing import find_line_number_of_relevant_line_in_file - +from pr_agent.algo.types import FilePatchInfo +from pr_agent.algo.utils import find_line_number_of_relevant_line_in_file import pytest
  • {header}