From 39e5102a2e5abb5e83ed9e4b6b78d612449af67f Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 12:47:18 +0200 Subject: [PATCH 1/8] fix added files --- pr_agent/algo/git_patch_processing.py | 5 ++-- pr_agent/algo/pr_processing.py | 31 +++++++++++++------- pr_agent/git_providers/bitbucket_provider.py | 19 +++++++----- pr_agent/git_providers/git_provider.py | 6 ++-- 4 files changed, 38 insertions(+), 23 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 4c20ea48..92cf2466 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -3,6 +3,7 @@ from __future__ import annotations import re from pr_agent.config_loader import get_settings +from pr_agent.git_providers.git_provider import EDIT_TYPE from pr_agent.log import get_logger @@ -115,7 +116,7 @@ def omit_deletion_hunks(patch_lines) -> str: def handle_patch_deletions(patch: str, original_file_content_str: str, - new_file_content_str: str, file_name: str) -> str: + new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE) -> str: """ Handle entire file or deletion patches. @@ -132,7 +133,7 @@ def handle_patch_deletions(patch: str, original_file_content_str: str, str: The modified patch with deletion hunks omitted. """ - if not new_file_content_str: + if not new_file_content_str and edit_type != EDIT_TYPE.ADDED: # logic for handling deleted files - don't show patch, just show that the file was deleted if get_settings().config.verbosity_level > 0: get_logger().info(f"Processing file: {file_name}, minimizing deletion file") diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 269093cb..49c4ee73 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -13,12 +13,14 @@ from pr_agent.algo.file_filter import filter_ignored from pr_agent.algo.token_handler import TokenHandler, get_token_encoder from pr_agent.algo.utils import get_max_tokens from pr_agent.config_loader import get_settings -from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider +from pr_agent.git_providers.git_provider import FilePatchInfo, GitProvider, EDIT_TYPE from pr_agent.log import get_logger DELETED_FILES_ = "Deleted files:\n" -MORE_MODIFIED_FILES_ = "More modified files:\n" +MORE_MODIFIED_FILES_ = "Additional modified files (not enough token budget to process):\n" + +ADDED_FILES_ = "Additional added files (not enough token budget to process):\n" OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 @@ -68,10 +70,13 @@ def get_pr_diff(git_provider: GitProvider, token_handler: TokenHandler, model: s return "\n".join(patches_extended) # if we are over the limit, start pruning - patches_compressed, modified_file_names, deleted_file_names = \ + patches_compressed, modified_file_names, deleted_file_names, added_file_names = \ pr_generate_compressed_diff(pr_languages, token_handler, model, add_line_numbers_to_hunks) final_diff = "\n".join(patches_compressed) + if added_file_names: + added_list_str = ADDED_FILES_ + "\n".join(added_file_names) + final_diff = final_diff + "\n\n" + added_list_str if modified_file_names: modified_list_str = MORE_MODIFIED_FILES_ + "\n".join(modified_file_names) final_diff = final_diff + "\n\n" + modified_list_str @@ -122,7 +127,7 @@ def pr_generate_extended_diff(pr_languages: list, def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, model: str, - convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list]: + convert_hunks_to_line_numbers: bool) -> Tuple[list, list, list, list]: """ Generate a compressed diff string for a pull request, using diff minimization techniques to reduce the number of tokens used. @@ -148,6 +153,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo """ patches = [] + added_files_list = [] modified_files_list = [] deleted_files_list = [] # sort each one of the languages in top_langs by the number of tokens in the diff @@ -165,7 +171,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo # removing delete-only hunks patch = handle_patch_deletions(patch, original_file_content_str, - new_file_content_str, file.filename) + new_file_content_str, file.filename, file.edit_type) if patch is None: if not deleted_files_list: total_tokens += token_handler.count_tokens(DELETED_FILES_) @@ -190,10 +196,15 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo # until we meet the requirements if get_settings().config.verbosity_level >= 2: get_logger().warning(f"Patch too large, minimizing it, {file.filename}") - if not modified_files_list: - total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_) - modified_files_list.append(file.filename) - total_tokens += token_handler.count_tokens(file.filename) + 1 + if file.edit_type == EDIT_TYPE.ADDED: + if not added_files_list: + total_tokens += token_handler.count_tokens(ADDED_FILES_) + added_files_list.append(file.filename) + else: + if not modified_files_list: + total_tokens += token_handler.count_tokens(MORE_MODIFIED_FILES_) + modified_files_list.append(file.filename) + total_tokens += token_handler.count_tokens(file.filename) + 1 continue if patch: @@ -206,7 +217,7 @@ def pr_generate_compressed_diff(top_langs: list, token_handler: TokenHandler, mo if get_settings().config.verbosity_level >= 2: get_logger().info(f"Tokens: {total_tokens}, last filename: {file.filename}") - return patches, modified_files_list, deleted_files_list + return patches, modified_files_list, deleted_files_list, added_files_list async def retry_with_fallback_models(f: Callable): diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index ec7a66ed..1301805f 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -9,7 +9,7 @@ from starlette_context import context from ..algo.pr_processing import find_line_number_of_relevant_line_in_file from ..config_loader import get_settings from ..log import get_logger -from .git_provider import FilePatchInfo, GitProvider +from .git_provider import FilePatchInfo, GitProvider, EDIT_TYPE class BitbucketProvider(GitProvider): @@ -132,14 +132,17 @@ class BitbucketProvider(GitProvider): diff.old.get_data("links") ) new_file_content_str = self._get_pr_file_content(diff.new.get_data("links")) - diff_files.append( - FilePatchInfo( - original_file_content_str, - new_file_content_str, - diff_split[index], - diff.new.path, - ) + file_patch_canonic_structure = FilePatchInfo( + original_file_content_str, + new_file_content_str, + diff_split[index], + diff.new.path, ) + if diff.data['status'] == 'added': + file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED + diff_files.append(file_patch_canonic_structure) + + self.diff_files = diff_files return diff_files diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 3511c2a6..6598af5d 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -94,10 +94,10 @@ class GitProvider(ABC): def get_pr_description(self, *, full: bool = True) -> str: from pr_agent.config_loader import get_settings from pr_agent.algo.pr_processing import clip_tokens - max_tokens = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) + max_tokens_description = get_settings().get("CONFIG.MAX_DESCRIPTION_TOKENS", None) description = self.get_pr_description_full() if full else self.get_user_description() - if max_tokens: - return clip_tokens(description, max_tokens) + if max_tokens_description: + return clip_tokens(description, max_tokens_description) return description def get_user_description(self) -> str: From c5bc4b44ff24858382fd98754868ac4fc9561128 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 12:51:30 +0200 Subject: [PATCH 2/8] fix added files --- pr_agent/algo/git_patch_processing.py | 2 +- pr_agent/algo/pr_processing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 92cf2466..2978ac4a 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -116,7 +116,7 @@ def omit_deletion_hunks(patch_lines) -> str: def handle_patch_deletions(patch: str, original_file_content_str: str, - new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE) -> str: + new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.MODIFIED) -> str: """ Handle entire file or deletion patches. diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 49c4ee73..41c6ec74 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -408,7 +408,7 @@ def get_pr_multi_diffs(git_provider: GitProvider, continue # Remove delete-only hunks - patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename) + patch = handle_patch_deletions(patch, original_file_content_str, new_file_content_str, file.filename, file.edit_type) if patch is None: continue From 69b68b78f5a8d873cad2e3a0715c6c905ec5abc2 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 14:17:59 +0200 Subject: [PATCH 3/8] s --- pr_agent/algo/git_patch_processing.py | 2 +- pr_agent/algo/pr_processing.py | 4 ++-- pr_agent/git_providers/git_provider.py | 3 ++- pr_agent/git_providers/github_provider.py | 8 ++++++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 2978ac4a..480387fa 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -116,7 +116,7 @@ def omit_deletion_hunks(patch_lines) -> str: def handle_patch_deletions(patch: str, original_file_content_str: str, - new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.MODIFIED) -> str: + new_file_content_str: str, file_name: str, edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN) -> str: """ Handle entire file or deletion patches. diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 41c6ec74..e5b6f59e 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -18,9 +18,9 @@ from pr_agent.log import get_logger DELETED_FILES_ = "Deleted files:\n" -MORE_MODIFIED_FILES_ = "Additional modified files (not enough token budget to process):\n" +MORE_MODIFIED_FILES_ = "Additional modified files (insufficient token budget to process):\n" -ADDED_FILES_ = "Additional added files (not enough token budget to process):\n" +ADDED_FILES_ = "Additional added files (insufficient token budget to process):\n" OUTPUT_BUFFER_TOKENS_SOFT_THRESHOLD = 1000 OUTPUT_BUFFER_TOKENS_HARD_THRESHOLD = 600 diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 6598af5d..01308f45 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -13,6 +13,7 @@ class EDIT_TYPE(Enum): DELETED = 2 MODIFIED = 3 RENAMED = 4 + UNKNOWN = 5 @dataclass @@ -22,7 +23,7 @@ class FilePatchInfo: patch: str filename: str tokens: int = -1 - edit_type: EDIT_TYPE = EDIT_TYPE.MODIFIED + edit_type: EDIT_TYPE = EDIT_TYPE.UNKNOWN old_filename: str = None diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 5ed35d04..d9b7c9b1 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -13,7 +13,7 @@ from ..algo.utils import load_large_diff from ..config_loader import get_settings from ..log import get_logger from ..servers.utils import RateLimitExceeded -from .git_provider import FilePatchInfo, GitProvider, IncrementalPR +from .git_provider import FilePatchInfo, GitProvider, IncrementalPR, EDIT_TYPE class GithubProvider(GitProvider): @@ -129,7 +129,11 @@ class GithubProvider(GitProvider): if not patch: patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) - diff_files.append(FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename)) + edit_type = EDIT_TYPE.MODIFIED + if file.status == 'added': + edit_type = EDIT_TYPE.ADDED + file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename, edit_type=edit_type) + diff_files.append(file_patch_canonical_structure) self.diff_files = diff_files return diff_files From 10ec0a1812ece1bae3d4f5f130bc925dc9643bde Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 14:21:03 +0200 Subject: [PATCH 4/8] s --- pr_agent/git_providers/github_provider.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index d9b7c9b1..cede29fd 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -129,10 +129,19 @@ class GithubProvider(GitProvider): if not patch: patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) - edit_type = EDIT_TYPE.MODIFIED if file.status == 'added': edit_type = EDIT_TYPE.ADDED - file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch, file.filename, edit_type=edit_type) + elif file.status == 'removed': + edit_type = EDIT_TYPE.DELETED + elif file.status == 'renamed': + edit_type = EDIT_TYPE.RENAMED + elif file.status == 'modified': + edit_type = EDIT_TYPE.MODIFIED + else: + get_logger().error(f"Unknown edit type: {file.status}") + edit_type = EDIT_TYPE.UNKNOWN + file_patch_canonical_structure = FilePatchInfo(original_file_content_str, new_file_content_str, patch, + file.filename, edit_type=edit_type) diff_files.append(file_patch_canonical_structure) self.diff_files = diff_files From 0293412a4263009babc1f43cc8fec1475d7afef4 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 14:31:08 +0200 Subject: [PATCH 5/8] s --- pr_agent/algo/file_filter.py | 2 +- pr_agent/git_providers/bitbucket_provider.py | 7 +++++++ pr_agent/git_providers/git_provider.py | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pr_agent/algo/file_filter.py b/pr_agent/algo/file_filter.py index cc466f57..32c61155 100644 --- a/pr_agent/algo/file_filter.py +++ b/pr_agent/algo/file_filter.py @@ -23,7 +23,7 @@ def filter_ignored(files): # keep filenames that _don't_ match the ignore regex for r in compiled_patterns: - files = [f for f in files if not r.match(f.filename)] + files = [f for f in files if (f.filename and not r.match(f.filename))] except Exception as e: print(f"Could not filter file list: {e}") diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 1301805f..155b03ec 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -138,8 +138,15 @@ class BitbucketProvider(GitProvider): diff_split[index], diff.new.path, ) + if diff.data['status'] == 'added': file_patch_canonic_structure.edit_type = EDIT_TYPE.ADDED + elif diff.data['status'] == 'removed': + file_patch_canonic_structure.edit_type = EDIT_TYPE.DELETED + elif diff.data['status'] == 'modified': + file_patch_canonic_structure.edit_type = EDIT_TYPE.MODIFIED + elif diff.data['status'] == 'renamed': + file_patch_canonic_structure.edit_type = EDIT_TYPE.RENAMED diff_files.append(file_patch_canonic_structure) diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 01308f45..59bf5956 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -154,6 +154,8 @@ def get_main_pr_language(languages, files) -> str: # validate that the specific commit uses the main language extension_list = [] for file in files: + if not file: + continue if isinstance(file, str): file = FilePatchInfo(base_file=None, head_file=None, patch=None, filename=file) extension_list.append(file.filename.rsplit('.')[-1]) From a1e32d833157708456b7f1ad074abd5ab6786f7e Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 14:36:59 +0200 Subject: [PATCH 6/8] s --- pr_agent/git_providers/bitbucket_provider.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 155b03ec..28754ff9 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -298,6 +298,11 @@ class BitbucketProvider(GitProvider): }) response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload) + try: + if response.status_code != 200: + get_logger().info(f"Failed to update description, error code: {response.status_code}") + except: + pass return response # bitbucket does not support labels From 49725e92f2d8c3d8a69cad83d16da7827d74d449 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 14:41:15 +0200 Subject: [PATCH 7/8] s --- pr_agent/settings/pr_reviewer_prompts.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index 0ac1145f..103d5e14 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -152,7 +152,8 @@ PR Analysis: Focused PR: no, because ... {%- endif %} {%- if require_estimate_effort_to_review %} - Estimated effort to review [1-5]: 3, because ... + Estimated effort to review [1-5]: |- + 3, because ... {%- endif %} PR Feedback: General PR suggestions: |- From c2bec614e518bba563c592318aab4c59d3107a11 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Wed, 8 Nov 2023 14:46:11 +0200 Subject: [PATCH 8/8] s --- pr_agent/algo/utils.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index ecaf4aa8..e5d13475 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -328,20 +328,26 @@ def set_custom_labels(variables): variables["custom_labels_examples"] = f" - {list(labels.keys())[0]}" -def get_user_labels(current_labels): - ## Only keep labels that has been added by the user - if current_labels is None: - current_labels = [] - user_labels = [] - for label in current_labels: - if label in ['Bug fix', 'Tests', 'Refactoring', 'Enhancement', 'Documentation', 'Other']: - continue - if get_settings().config.enable_custom_labels: - if label in get_settings().custom_labels: +def get_user_labels(current_labels: List[str] = None): + """ + Only keep labels that has been added by the user + """ + try: + if current_labels is None: + current_labels = [] + user_labels = [] + for label in current_labels: + if label.lower() in ['bug fix', 'tests', 'refactoring', 'enhancement', 'documentation', 'other']: continue - user_labels.append(label) - if user_labels: - get_logger().info(f"Keeping user labels: {user_labels}") + if get_settings().config.enable_custom_labels: + if label in get_settings().custom_labels: + continue + user_labels.append(label) + if user_labels: + get_logger().info(f"Keeping user labels: {user_labels}") + except Exception as e: + get_logger().exception(f"Failed to get user labels: {e}") + return current_labels return user_labels