diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 76313ddf..affe7a46 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -1,4 +1,6 @@ +import difflib import json +import re from typing import Optional, Tuple from urllib.parse import urlparse @@ -72,24 +74,38 @@ class BitbucketProvider(GitProvider): post_parameters_list = [] for suggestion in code_suggestions: body = suggestion["body"] + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code + if original_suggestion: + try: + existing_code = original_suggestion['existing_code'].rstrip() + "\n" + improved_code = original_suggestion['improved_code'].rstrip() + "\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" + # replace ```suggestion ... ``` with diff_code, using regex: + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) + except Exception as e: + get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}") + continue + relevant_file = suggestion["relevant_file"] relevant_lines_start = suggestion["relevant_lines_start"] relevant_lines_end = suggestion["relevant_lines_end"] if not relevant_lines_start or relevant_lines_start == -1: - if get_settings().config.verbosity_level >= 2: - get_logger().exception( - f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" - ) + get_logger().exception( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" + ) continue if relevant_lines_end < relevant_lines_start: - if get_settings().config.verbosity_level >= 2: - get_logger().exception( - f"Failed to publish code suggestion, " - f"relevant_lines_end is {relevant_lines_end} and " - f"relevant_lines_start is {relevant_lines_start}" - ) + get_logger().exception( + f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}" + ) continue if relevant_lines_end > relevant_lines_start: @@ -113,8 +129,7 @@ class BitbucketProvider(GitProvider): self.publish_inline_comments(post_parameters_list) return True except Exception as e: - if get_settings().config.verbosity_level >= 2: - get_logger().error(f"Failed to publish code suggestion, error: {e}") + get_logger().error(f"Bitbucket failed to publish code suggestion, error: {e}") return False def publish_file_comments(self, file_comments: list) -> bool: @@ -122,7 +137,7 @@ class BitbucketProvider(GitProvider): def is_supported(self, capability: str) -> bool: if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown', - 'publish_file_comments']: + 'publish_file_comments']: return False return True diff --git a/pr_agent/git_providers/bitbucket_server_provider.py b/pr_agent/git_providers/bitbucket_server_provider.py index 9dc38099..4dfa8226 100644 --- a/pr_agent/git_providers/bitbucket_server_provider.py +++ b/pr_agent/git_providers/bitbucket_server_provider.py @@ -1,3 +1,6 @@ +import difflib +import re + from packaging.version import parse as parse_version from typing import Optional, Tuple from urllib.parse import quote_plus, urlparse @@ -67,24 +70,37 @@ class BitbucketServerProvider(GitProvider): post_parameters_list = [] for suggestion in code_suggestions: body = suggestion["body"] + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code + if original_suggestion: + try: + existing_code = original_suggestion['existing_code'].rstrip() + "\n" + improved_code = original_suggestion['improved_code'].rstrip() + "\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" + # replace ```suggestion ... ``` with diff_code, using regex: + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) + except Exception as e: + get_logger().exception(f"Bitbucket failed to get diff code for publishing, error: {e}") + continue relevant_file = suggestion["relevant_file"] relevant_lines_start = suggestion["relevant_lines_start"] relevant_lines_end = suggestion["relevant_lines_end"] if not relevant_lines_start or relevant_lines_start == -1: - if get_settings().config.verbosity_level >= 2: - get_logger().exception( - f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" - ) + get_logger().warning( + f"Failed to publish code suggestion, relevant_lines_start is {relevant_lines_start}" + ) continue if relevant_lines_end < relevant_lines_start: - if get_settings().config.verbosity_level >= 2: - get_logger().exception( - f"Failed to publish code suggestion, " - f"relevant_lines_end is {relevant_lines_end} and " - f"relevant_lines_start is {relevant_lines_start}" - ) + get_logger().warning( + f"Failed to publish code suggestion, " + f"relevant_lines_end is {relevant_lines_end} and " + f"relevant_lines_start is {relevant_lines_start}" + ) continue if relevant_lines_end > relevant_lines_start: diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 1efa8744..cc8e158a 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -1,5 +1,8 @@ +import copy +import difflib import hashlib import itertools +import re import time import traceback from datetime import datetime @@ -11,6 +14,7 @@ from retry import retry from starlette_context import context from ..algo.file_filter import filter_ignored +from ..algo.git_patch_processing import extract_hunk_headers from ..algo.language_handler import is_valid_file from ..algo.types import EDIT_TYPE from ..algo.utils import (PRReviewHeader, Range, clip_tokens, @@ -415,7 +419,10 @@ class GithubProvider(GitProvider): Publishes code suggestions as comments on the PR. """ post_parameters_list = [] - for suggestion in code_suggestions: + + code_suggestions_validated = self.validate_comments_inside_hunks(code_suggestions) + + for suggestion in code_suggestions_validated: body = suggestion['body'] relevant_file = suggestion['relevant_file'] relevant_lines_start = suggestion['relevant_lines_start'] @@ -872,3 +879,100 @@ class GithubProvider(GitProvider): def calc_pr_statistics(self, pull_request_data: dict): return {} + + def validate_comments_inside_hunks(self, code_suggestions): + """ + validate that all committable comments are inside PR hunks - this is a must for committable comments in GitHub + """ + code_suggestions_copy = copy.deepcopy(code_suggestions) + diff_files = self.get_diff_files() + RE_HUNK_HEADER = re.compile( + r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") + + # map file extensions to programming languages + language_extension_map_org = get_settings().language_extension_map_org + extension_to_language = {} + for language, extensions in language_extension_map_org.items(): + for ext in extensions: + extension_to_language[ext] = language + for file in diff_files: + extension_s = '.' + file.filename.rsplit('.')[-1] + language_name = "txt" + if extension_s and (extension_s in extension_to_language): + language_name = extension_to_language[extension_s] + file.language = language_name.lower() + + for suggestion in code_suggestions_copy: + try: + relevant_file_path = suggestion['relevant_file'] + for file in diff_files: + if file.filename == relevant_file_path: + + # generate on-demand the patches range for the relevant file + patch_str = file.patch + if not hasattr(file, 'patches_range'): + file.patches_range = [] + patch_lines = patch_str.splitlines() + for i, line in enumerate(patch_lines): + if line.startswith('@@'): + match = RE_HUNK_HEADER.match(line) + # identify hunk header + if match: + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + file.patches_range.append({'start': start2, 'end': start2 + size2 - 1}) + + patches_range = file.patches_range + comment_start_line = suggestion.get('relevant_lines_start', None) + comment_end_line = suggestion.get('relevant_lines_end', None) + original_suggestion = suggestion.get('original_suggestion', None) # needed for diff code + if not comment_start_line or not comment_end_line or not original_suggestion: + continue + + # check if the comment is inside a valid hunk + is_valid_hunk = False + min_distance = float('inf') + patch_range_min = None + # find the hunk that contains the comment, or the closest one + for i, patch_range in enumerate(patches_range): + d1 = comment_start_line - patch_range['start'] + d2 = patch_range['end'] - comment_end_line + if d1 >= 0 and d2 >= 0: # found a valid hunk + is_valid_hunk = True + min_distance = 0 + patch_range_min = patch_range + break + elif d1 * d2 <= 0: # comment is possibly inside the hunk + d1_clip = abs(min(0, d1)) + d2_clip = abs(min(0, d2)) + d = max(d1_clip, d2_clip) + if d < min_distance: + patch_range_min = patch_range + min_distance = min(min_distance, d) + if not is_valid_hunk: + if min_distance < 10: # 10 lines - a reasonable distance to consider the comment inside the hunk + # make the suggestion non-committable, yet multi line + suggestion['relevant_lines_start'] = max(suggestion['relevant_lines_start'], patch_range_min['start']) + suggestion['relevant_lines_end'] = min(suggestion['relevant_lines_end'], patch_range_min['end']) + body = suggestion['body'].strip() + + # present new diff code in collapsible + existing_code = original_suggestion['existing_code'].rstrip() + "\n" + improved_code = original_suggestion['improved_code'].rstrip() + "\n" + diff = difflib.unified_diff(existing_code.split('\n'), + improved_code.split('\n'), n=999) + patch_orig = "\n".join(diff) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n
New proposed code:\n\n```diff\n{patch.rstrip()}\n```" + # replace ```suggestion ... ``` with diff_code, using regex: + body = re.sub(r'```suggestion.*?```', diff_code, body, flags=re.DOTALL) + body += "\n\n
" + suggestion['body'] = body + get_logger().info(f"Comment was moved to a valid hunk, " + f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}") + else: + get_logger().error(f"Comment is not inside a valid hunk, " + f"start_line={suggestion['relevant_lines_start']}, end_line={suggestion['relevant_lines_end']}, file={file.filename}") + except Exception as e: + get_logger().error(f"Failed to process patch for committable comment, error: {e}") + return code_suggestions_copy + diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 6a7b85c3..7489a1a2 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -1,3 +1,4 @@ +import difflib import hashlib import re from typing import Optional, Tuple @@ -278,20 +279,23 @@ class GitLabProvider(GitProvider): new_code_snippet = original_suggestion['improved_code'] content = original_suggestion['suggestion_content'] label = original_suggestion['label'] - if 'score' in original_suggestion: - score = original_suggestion['score'] - else: - score = 7 + score = original_suggestion.get('score', 7) if hasattr(self, 'main_language'): language = self.main_language else: language = '' link = self.get_line_link(relevant_file, line_start, line_end) - body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n___\n" - body_fallback +=f"\n\nReplace lines ([{line_start}-{line_end}]({link}))\n\n```{language}\n{old_code_snippet}\n````\n\n" - body_fallback +=f"with\n\n```{language}\n{new_code_snippet}\n````" - body_fallback += f"\n\n___\n\n`(Cannot implement this suggestion directly, as gitlab API does not enable committing to a non -+ line in a PR)`" + body_fallback =f"**Suggestion:** {content} [{label}, importance: {score}]\n\n" + body_fallback +=f"\n\n
[{target_file.filename} [{line_start}-{line_end}]]({link}):\n\n" + body_fallback += f"\n\n___\n\n`(Cannot implement directly - GitLab API allows committable suggestions strictly on MR diff lines)`" + body_fallback+="
\n\n" + diff_patch = difflib.unified_diff(old_code_snippet.split('\n'), + new_code_snippet.split('\n'), n=999) + patch_orig = "\n".join(diff_patch) + patch = "\n".join(patch_orig.splitlines()[5:]).strip('\n') + diff_code = f"\n\n```diff\n{patch.rstrip()}\n```" + body_fallback += diff_code # Create a general note on the file in the MR self.mr.notes.create({ @@ -304,6 +308,7 @@ class GitLabProvider(GitProvider): 'file_path': f'{target_file.filename}', } }) + get_logger().debug(f"Created fallback comment in MR {self.id_mr} with position {pos_obj}") # get_logger().debug( # f"Failed to create comment in MR {self.id_mr} with position {pos_obj} (probably not a '+' line)")