diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index e7561152..c8b620fe 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -106,6 +106,10 @@ class LiteLLMAIHandler(BaseAiHandler): deployment_id = self.deployment_id if self.azure: model = 'azure/' + model + if 'claude' in model and not system: + system = "\n" + get_logger().warning( + "Empty system prompt for claude model. Adding a newline character to prevent OpenAI API error.") messages = [{"role": "system", "content": system}, {"role": "user", "content": user}] if img_path: try: diff --git a/pr_agent/algo/language_handler.py b/pr_agent/algo/language_handler.py index 91419368..7c103998 100644 --- a/pr_agent/algo/language_handler.py +++ b/pr_agent/algo/language_handler.py @@ -14,7 +14,9 @@ def filter_bad_extensions(files): return [f for f in files if f.filename is not None and is_valid_file(f.filename, bad_extensions)] -def is_valid_file(filename, bad_extensions=None): +def is_valid_file(filename:str, bad_extensions=None) -> bool: + if not filename: + return False if not bad_extensions: bad_extensions = get_settings().bad_extensions.default if get_settings().config.use_extra_bad_extensions: diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index bcf6afca..bb648060 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -147,7 +147,7 @@ def convert_to_markdown_v2(output_data: dict, else: markdown_text += f"### {emoji} {key_nice}: {value}\n\n" elif 'relevant tests' in key_nice.lower(): - value = value.strip().lower() + value = str(value).strip().lower() if gfm_supported: markdown_text += f"" if is_value_no(value): @@ -674,14 +674,16 @@ def get_user_labels(current_labels: List[str] = None): Only keep labels that has been added by the user """ try: + enable_custom_labels = get_settings().config.get('enable_custom_labels', False) + custom_labels = get_settings().get('custom_labels', []) if current_labels is None: current_labels = [] user_labels = [] for label in current_labels: if label.lower() in ['bug fix', 'tests', 'enhancement', 'documentation', 'other']: continue - if get_settings().config.enable_custom_labels: - if label in get_settings().custom_labels: + if enable_custom_labels: + if label in custom_labels: continue user_labels.append(label) if user_labels: diff --git a/pr_agent/git_providers/bitbucket_provider.py b/pr_agent/git_providers/bitbucket_provider.py index 35d9f2d7..a76624dc 100644 --- a/pr_agent/git_providers/bitbucket_provider.py +++ b/pr_agent/git_providers/bitbucket_provider.py @@ -15,6 +15,12 @@ from ..log import get_logger from .git_provider import GitProvider +def _gef_filename(diff): + if diff.new.path: + return diff.new.path + return diff.old.path + + class BitbucketProvider(GitProvider): def __init__( self, pr_url: Optional[str] = None, incremental: Optional[bool] = False @@ -40,6 +46,7 @@ class BitbucketProvider(GitProvider): self.temp_comments = [] self.incremental = incremental self.diff_files = None + self.git_files = None if pr_url: self.set_pr(pr_url) self.bitbucket_comment_api_url = self.pr._BitbucketBase__data["links"]["comments"]["href"] @@ -123,7 +130,18 @@ class BitbucketProvider(GitProvider): self.pr = self._get_pr() def get_files(self): - return [diff.new.path for diff in self.pr.diffstat()] + try: + git_files = context.get("git_files", None) + if git_files: + return git_files + self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()] + context["git_files"] = self.git_files + return self.git_files + except Exception: + if not self.git_files: + self.git_files = [_gef_filename(diff) for diff in self.pr.diffstat()] + return self.git_files + def get_diff_files(self) -> list[FilePatchInfo]: if self.diff_files: @@ -172,14 +190,18 @@ class BitbucketProvider(GitProvider): diff_split_lines[5].startswith("@@"))): diff_split[i] = "\n".join(diff_split_lines[4:]) else: - get_logger().error(f"Error - failed to remove the bitbucket header from diff {i}") - break + if diffs[i].data.get('lines_added', 0) == 0 and diffs[i].data.get('lines_removed', 0) == 0: + diff_split[i] = "" + else: + get_logger().error(f"Error - failed to remove the bitbucket header from diff {i}") + break invalid_files_names = [] diff_files = [] for index, diff in enumerate(diffs): - if not is_valid_file(diff.new.path): - invalid_files_names.append(diff.new.path) + file_path = _gef_filename(diff) + if not is_valid_file(file_path): + invalid_files_names.append(file_path) continue try: @@ -200,7 +222,7 @@ class BitbucketProvider(GitProvider): original_file_content_str, new_file_content_str, diff_split[index], - diff.new.path, + file_path, ) if diff.data['status'] == 'added': diff --git a/pr_agent/secret_providers/google_cloud_storage_secret_provider.py b/pr_agent/secret_providers/google_cloud_storage_secret_provider.py index 2dfb71f6..8cbaebe3 100644 --- a/pr_agent/secret_providers/google_cloud_storage_secret_provider.py +++ b/pr_agent/secret_providers/google_cloud_storage_secret_provider.py @@ -22,7 +22,7 @@ class GoogleCloudStorageSecretProvider(SecretProvider): blob = self.bucket.blob(secret_name) return blob.download_as_string() except Exception as e: - get_logger().error(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}") + get_logger().warning(f"Failed to get secret {secret_name} from Google Cloud Storage: {e}") return "" def store_secret(self, secret_name: str, secret_value: str): diff --git a/pr_agent/servers/bitbucket_app.py b/pr_agent/servers/bitbucket_app.py index 634b48bf..988c170b 100644 --- a/pr_agent/servers/bitbucket_app.py +++ b/pr_agent/servers/bitbucket_app.py @@ -108,13 +108,18 @@ async def handle_github_webhooks(background_tasks: BackgroundTasks, request: Req return "OK" except KeyError: get_logger().error("Failed to get actor type, check previous logs, this shouldn't happen.") + + # Get the username of the sender try: - owner = data["data"]["repository"]["owner"]["username"] - except Exception as e: - get_logger().error(f"Failed to get owner, will continue: {e}") - owner = "unknown" + username = data["data"]["actor"]["username"] + except KeyError: + try: + username = data["data"]["actor"]["display_name"] + except KeyError: + username = data["data"]["actor"]["nickname"] + log_context["sender"] = username + sender_id = data["data"]["actor"]["account_id"] - log_context["sender"] = owner log_context["sender_id"] = sender_id jwt_parts = input_jwt.split(".") claim_part = jwt_parts[1] diff --git a/pr_agent/servers/gitlab_webhook.py b/pr_agent/servers/gitlab_webhook.py index 2c525858..4a814e9f 100644 --- a/pr_agent/servers/gitlab_webhook.py +++ b/pr_agent/servers/gitlab_webhook.py @@ -87,6 +87,10 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request): if request.headers.get("X-Gitlab-Token") and secret_provider: request_token = request.headers.get("X-Gitlab-Token") secret = secret_provider.get_secret(request_token) + if not secret: + get_logger().warning(f"Empty secret retrieved, request_token: {request_token}") + return JSONResponse(status_code=status.HTTP_401_UNAUTHORIZED, + content=jsonable_encoder({"message": "unauthorized"})) try: secret_dict = json.loads(secret) gitlab_token = secret_dict["gitlab_token"] diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index f7bb3315..c0f3c0bf 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -450,8 +450,24 @@ class PRCodeSuggestions: original_initial_line = None for file in self.diff_files: if file.filename.strip() == relevant_file: - if file.head_file: # in bitbucket, head_file is empty. toDo: fix this - original_initial_line = file.head_file.splitlines()[relevant_lines_start - 1] + if file.head_file: + file_lines = file.head_file.splitlines() + if relevant_lines_start > len(file_lines): + get_logger().warning( + "Could not dedent code snippet, because relevant_lines_start is out of range", + artifact={'filename': file.filename, + 'file_content': file.head_file, + 'relevant_lines_start': relevant_lines_start, + 'new_code_snippet': new_code_snippet}) + return new_code_snippet + else: + original_initial_line = file_lines[relevant_lines_start - 1] + else: + get_logger().warning("Could not dedent code snippet, because head_file is missing", + artifact={'filename': file.filename, + 'relevant_lines_start': relevant_lines_start, + 'new_code_snippet': new_code_snippet}) + return new_code_snippet break if original_initial_line: suggested_initial_line = new_code_snippet.splitlines()[0] @@ -461,7 +477,7 @@ class PRCodeSuggestions: if delta_spaces > 0: new_code_snippet = textwrap.indent(new_code_snippet, delta_spaces * " ").rstrip('\n') except Exception as e: - get_logger().error(f"Could not dedent code snippet for file {relevant_file}, error: {e}") + get_logger().error(f"Error when dedenting code snippet for file {relevant_file}, error: {e}") return new_code_snippet diff --git a/pr_agent/tools/pr_description.py b/pr_agent/tools/pr_description.py index 7ba35fa4..2214085a 100644 --- a/pr_agent/tools/pr_description.py +++ b/pr_agent/tools/pr_description.py @@ -510,6 +510,12 @@ extra_file_yaml = file_label_dict = {} for file in self.data['pr_files']: try: + required_fields = ['changes_summary', 'changes_title', 'filename', 'label'] + if not all(field in file for field in required_fields): + # can happen for example if a YAML generation was interrupted in the middle (no more tokens) + get_logger().warning(f"Missing required fields in file label dict {self.pr_id}, skipping file", + artifact={"file": file}) + continue filename = file['filename'].replace("'", "`").replace('"', '`') changes_summary = file['changes_summary'] changes_title = file['changes_title'].strip()