From 2c496b9d4eff032cfca3bc68a84989c90100dfec Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 1 Sep 2024 08:38:26 +0300 Subject: [PATCH 1/3] fix: improve PR patch decoding and clean up Bitbucket provider code - Add fallback encodings for PR patch decoding to handle non-UTF-8 encodings. - Update logging messages for better clarity. - Remove unnecessary blank lines and fix minor formatting issues. - Ensure full files are retrieved in `get_diff_files` method. --- pr_agent/git_providers/bitbucket_provider.py | 51 +++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index dd9bfc4c..28e94d67 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -121,7 +121,7 @@ class BitbucketProvider(GitProvider): def is_supported(self, capability: str) -> bool: if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown', - 'publish_file_comments']: + 'publish_file_comments']: return False return True @@ -142,7 +142,6 @@ class BitbucketProvider(GitProvider): self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()] return self.git_files - def get_diff_files(self) -> list[FilePatchInfo]: if self.diff_files: return self.diff_files @@ -164,8 +163,25 @@ class BitbucketProvider(GitProvider): pass # get the pr patches - pr_patch = self.pr.diff() - diff_split = ["diff --git" + x for x in pr_patch.split("diff --git") if x.strip()] + try: + pr_patches = self.pr.diff() + except Exception as e: + # Try different encodings if UTF-8 fails + get_logger().warning(f"Failed to decode PR patch with utf-8, error: {e}") + encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16'] + pr_patches = None + for encoding in encodings_to_try: + try: + pr_patches = self.pr.diff(encoding=encoding) + get_logger().info(f"Successfully decoded PR patch with encoding {encoding}") + break + except UnicodeDecodeError: + continue + + if pr_patches is None: + raise ValueError(f"Failed to decode PR patch with encodings {encodings_to_try}") + + diff_split = ["diff --git" + x for x in pr_patches.split("diff --git") if x.strip()] # filter all elements of 'diff_split' that are of indices in 'diffs_original' that are not in 'diffs' if len(diff_split) > len(diffs) and len(diffs_original) == len(diff_split): diff_split = [diff_split[i] for i in range(len(diff_split)) if diffs_original[i] in diffs] @@ -196,12 +212,13 @@ class BitbucketProvider(GitProvider): diff_split[i] = "" get_logger().info(f"Disregarding empty diff for file {_gef_filename(diffs[i])}") else: - get_logger().error(f"Error - failed to get diff for file {_gef_filename(diffs[i])}") + get_logger().warning(f"Bitbucket failed to get diff for file {_gef_filename(diffs[i])}") diff_split[i] = "" invalid_files_names = [] diff_files = [] counter_valid = 0 + # get full files for index, diff in enumerate(diffs): file_path = _gef_filename(diff) if not is_valid_file(file_path): @@ -316,11 +333,13 @@ class BitbucketProvider(GitProvider): get_logger().exception(f"Failed to remove comment, error: {e}") # function to create_inline_comment - def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None): + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): body = self.limit_output_characters(body, self.max_comment_length) position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(), - relevant_file.strip('`'), - relevant_line_in_file, absolute_position) + relevant_file.strip('`'), + relevant_line_in_file, + absolute_position) if position == -1: if get_settings().config.verbosity_level >= 2: get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") @@ -330,10 +349,9 @@ class BitbucketProvider(GitProvider): path = relevant_file.strip() return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {} - def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None): comment = self.limit_output_characters(comment, self.max_comment_length) - payload = json.dumps( { + payload = json.dumps({ "content": { "raw": comment, }, @@ -378,10 +396,10 @@ class BitbucketProvider(GitProvider): for comment in comments: if 'position' in comment: self.publish_inline_comment(comment['body'], comment['position'], comment['path']) - elif 'start_line' in comment: # multi-line comment + elif 'start_line' in comment: # multi-line comment # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452 self.publish_inline_comment(comment['body'], comment['start_line'], comment['path']) - elif 'line' in comment: # single-line comment + elif 'line' in comment: # single-line comment self.publish_inline_comment(comment['body'], comment['line'], comment['path']) else: get_logger().error(f"Could not publish inline comment {comment}") @@ -465,7 +483,6 @@ class BitbucketProvider(GitProvider): except Exception: return "" - def create_or_update_pr_file(self, file_path: str, branch: str, contents="", message="") -> None: url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/") if not message: @@ -473,12 +490,12 @@ class BitbucketProvider(GitProvider): message = f"Update {file_path}" else: message = f"Create {file_path}" - files={file_path: contents} - data={ + files = {file_path: contents} + data = { "message": message, "branch": branch } - headers = {'Authorization':self.headers['Authorization']} if 'Authorization' in self.headers else {} + headers = {'Authorization': self.headers['Authorization']} if 'Authorization' in self.headers else {} try: requests.request("POST", url, headers=headers, data=data, files=files) except Exception: @@ -503,7 +520,7 @@ class BitbucketProvider(GitProvider): "description": description, "title": pr_title - }) + }) response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload) try: From 5432469ef6377f8100f8e7ba03a9312ee780fa5c Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 1 Sep 2024 08:39:29 +0300 Subject: [PATCH 2/3] fix: ensure non-empty lines are processed correctly in git patch handling --- pr_agent/algo/git_patch_processing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 0d85a4d3..18c617fc 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -164,9 +164,10 @@ def omit_deletion_hunks(patch_lines) -> str: inside_hunk = True else: temp_hunk.append(line) - edit_type = line[0] - if edit_type == '+': - add_hunk = True + if line: + edit_type = line[0] + if edit_type == '+': + add_hunk = True if inside_hunk and add_hunk: added_patched.extend(temp_hunk) From 9e063bf48a187206e85728abfe7d435a74ff0479 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 1 Sep 2024 08:47:23 +0300 Subject: [PATCH 3/3] fix: handle missing score in code suggestions and adjust indentation in pr_code_suggestions.py --- pr_agent/tools/pr_code_suggestions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index f374a08f..25f51295 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -104,8 +104,8 @@ class PRCodeSuggestions: if not data: data = {"code_suggestions": []} - if (data is None or 'code_suggestions' not in data or not data['code_suggestions'] - and get_settings().config.publish_output): + if (data is None or 'code_suggestions' not in data or not data['code_suggestions'] + and get_settings().config.publish_output): get_logger().warning('No code suggestions found for the PR.') pr_body = "## PR Code Suggestions ✨\n\nNo code suggestions found for the PR." get_logger().debug(f"PR output", artifact=pr_body) @@ -528,7 +528,7 @@ class PRCodeSuggestions: for i, prediction in enumerate(predictions["code_suggestions"]): try: if get_settings().pr_code_suggestions.self_reflect_on_suggestions: - score = int(prediction["score"]) + score = int(prediction.get("score", 1)) if score >= score_threshold: data["code_suggestions"].append(prediction) else: