From 2c496b9d4eff032cfca3bc68a84989c90100dfec Mon Sep 17 00:00:00 2001 From: mrT23 Date: Sun, 1 Sep 2024 08:38:26 +0300 Subject: [PATCH] fix: improve PR patch decoding and clean up Bitbucket provider code - Add fallback encodings for PR patch decoding to handle non-UTF-8 encodings. - Update logging messages for better clarity. - Remove unnecessary blank lines and fix minor formatting issues. - Ensure full files are retrieved in `get_diff_files` method. --- pr_agent/git_providers/bitbucket_provider.py | 51 +++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index dd9bfc4c..28e94d67 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -121,7 +121,7 @@ class BitbucketProvider(GitProvider): def is_supported(self, capability: str) -> bool: if capability in ['get_issue_comments', 'publish_inline_comments', 'get_labels', 'gfm_markdown', - 'publish_file_comments']: + 'publish_file_comments']: return False return True @@ -142,7 +142,6 @@ class BitbucketProvider(GitProvider): self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()] return self.git_files - def get_diff_files(self) -> list[FilePatchInfo]: if self.diff_files: return self.diff_files @@ -164,8 +163,25 @@ class BitbucketProvider(GitProvider): pass # get the pr patches - pr_patch = self.pr.diff() - diff_split = ["diff --git" + x for x in pr_patch.split("diff --git") if x.strip()] + try: + pr_patches = self.pr.diff() + except Exception as e: + # Try different encodings if UTF-8 fails + get_logger().warning(f"Failed to decode PR patch with utf-8, error: {e}") + encodings_to_try = ['iso-8859-1', 'latin-1', 'ascii', 'utf-16'] + pr_patches = None + for encoding in encodings_to_try: + try: + pr_patches = self.pr.diff(encoding=encoding) + get_logger().info(f"Successfully decoded PR patch with encoding {encoding}") + break + except UnicodeDecodeError: + continue + + if pr_patches is None: + raise ValueError(f"Failed to decode PR patch with encodings {encodings_to_try}") + + diff_split = ["diff --git" + x for x in pr_patches.split("diff --git") if x.strip()] # filter all elements of 'diff_split' that are of indices in 'diffs_original' that are not in 'diffs' if len(diff_split) > len(diffs) and len(diffs_original) == len(diff_split): diff_split = [diff_split[i] for i in range(len(diff_split)) if diffs_original[i] in diffs] @@ -196,12 +212,13 @@ class BitbucketProvider(GitProvider): diff_split[i] = "" get_logger().info(f"Disregarding empty diff for file {_gef_filename(diffs[i])}") else: - get_logger().error(f"Error - failed to get diff for file {_gef_filename(diffs[i])}") + get_logger().warning(f"Bitbucket failed to get diff for file {_gef_filename(diffs[i])}") diff_split[i] = "" invalid_files_names = [] diff_files = [] counter_valid = 0 + # get full files for index, diff in enumerate(diffs): file_path = _gef_filename(diff) if not is_valid_file(file_path): @@ -316,11 +333,13 @@ class BitbucketProvider(GitProvider): get_logger().exception(f"Failed to remove comment, error: {e}") # function to create_inline_comment - def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, absolute_position: int = None): + def create_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str, + absolute_position: int = None): body = self.limit_output_characters(body, self.max_comment_length) position, absolute_position = find_line_number_of_relevant_line_in_file(self.get_diff_files(), - relevant_file.strip('`'), - relevant_line_in_file, absolute_position) + relevant_file.strip('`'), + relevant_line_in_file, + absolute_position) if position == -1: if get_settings().config.verbosity_level >= 2: get_logger().info(f"Could not find position for {relevant_file} {relevant_line_in_file}") @@ -330,10 +349,9 @@ class BitbucketProvider(GitProvider): path = relevant_file.strip() return dict(body=body, path=path, position=absolute_position) if subject_type == "LINE" else {} - def publish_inline_comment(self, comment: str, from_line: int, file: str, original_suggestion=None): comment = self.limit_output_characters(comment, self.max_comment_length) - payload = json.dumps( { + payload = json.dumps({ "content": { "raw": comment, }, @@ -378,10 +396,10 @@ class BitbucketProvider(GitProvider): for comment in comments: if 'position' in comment: self.publish_inline_comment(comment['body'], comment['position'], comment['path']) - elif 'start_line' in comment: # multi-line comment + elif 'start_line' in comment: # multi-line comment # note that bitbucket does not seem to support range - only a comment on a single line - https://community.developer.atlassian.com/t/api-post-endpoint-for-inline-pull-request-comments/60452 self.publish_inline_comment(comment['body'], comment['start_line'], comment['path']) - elif 'line' in comment: # single-line comment + elif 'line' in comment: # single-line comment self.publish_inline_comment(comment['body'], comment['line'], comment['path']) else: get_logger().error(f"Could not publish inline comment {comment}") @@ -465,7 +483,6 @@ class BitbucketProvider(GitProvider): except Exception: return "" - def create_or_update_pr_file(self, file_path: str, branch: str, contents="", message="") -> None: url = (f"https://api.bitbucket.org/2.0/repositories/{self.workspace_slug}/{self.repo_slug}/src/") if not message: @@ -473,12 +490,12 @@ class BitbucketProvider(GitProvider): message = f"Update {file_path}" else: message = f"Create {file_path}" - files={file_path: contents} - data={ + files = {file_path: contents} + data = { "message": message, "branch": branch } - headers = {'Authorization':self.headers['Authorization']} if 'Authorization' in self.headers else {} + headers = {'Authorization': self.headers['Authorization']} if 'Authorization' in self.headers else {} try: requests.request("POST", url, headers=headers, data=data, files=files) except Exception: @@ -503,7 +520,7 @@ class BitbucketProvider(GitProvider): "description": description, "title": pr_title - }) + }) response = requests.request("PUT", self.bitbucket_pull_request_api_url, headers=self.headers, data=payload) try: