From 01ba6fe63d4a8ee7031be6b92b5cd9163b9b2b40 Mon Sep 17 00:00:00 2001 From: mrT23 Date: Tue, 10 Sep 2024 17:44:26 +0300 Subject: [PATCH 1/2] feat: enhance error handling and logging, update AI metadata terminology - Improved error handling and logging in `pr_processing.py` and `github_polling.py` to provide more detailed error information. - Updated AI metadata terminology from "AI-generated file summary" to "AI-generated changes summary" across multiple files for consistency. - Added a placeholder method `publish_file_comments` in `azuredevops_provider.py`. - Refined logging messages in `azuredevops_provider.py` for better clarity. --- docs/docs/core-abilities/metadata.md | 2 +- pr_agent/algo/pr_processing.py | 57 ++++++++++++------- .../git_providers/azuredevops_provider.py | 10 ++-- pr_agent/servers/github_polling.py | 22 ++++--- .../settings/pr_code_suggestions_prompts.toml | 8 +-- pr_agent/settings/pr_reviewer_prompts.toml | 4 +- 6 files changed, 62 insertions(+), 41 deletions(-) diff --git a/docs/docs/core-abilities/metadata.md b/docs/docs/core-abilities/metadata.md index d36c93e4..97b448a7 100644 --- a/docs/docs/core-abilities/metadata.md +++ b/docs/docs/core-abilities/metadata.md @@ -25,7 +25,7 @@ For example, when generating code suggestions for different files, PR-Agent can ``` ## File: 'src/file1.py' -### AI-generated file summary: +### AI-generated changes summary: - edited function `func1` that does X - Removed function `func2` that was not used - .... diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index fa3d217b..5b786de3 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -496,27 +496,42 @@ def add_ai_metadata_to_diff_files(git_provider, pr_description_files): """ Adds AI metadata to the diff files based on the PR description files (FilePatchInfo.ai_file_summary). """ - diff_files = git_provider.get_diff_files() - for file in diff_files: - filename = file.filename.strip() - found = False - for pr_file in pr_description_files: - if filename == pr_file['full_file_name'].strip(): - file.ai_file_summary = pr_file - found = True - break - if not found: - get_logger().info(f"File {filename} not found in the PR description files", - artifacts=pr_description_files) + try: + if not pr_description_files: + get_logger().warning(f"PR description files are empty.") + return + available_files = [pr_file['full_file_name'].strip() for pr_file in pr_description_files] + diff_files = git_provider.get_diff_files() + found_any_match = False + for file in diff_files: + filename = file.filename.strip() + index_file = available_files.index(filename) + if index_file == -1: + continue + file.ai_file_summary = pr_description_files[index_file] + found_any_match = True + if not found_any_match: + get_logger().error(f"Failed to find any matching files between PR description and diff files.", + artifact={"pr_description_files": pr_description_files}) + except Exception as e: + get_logger().error(f"Failed to add AI metadata to diff files: {e}", + artifact={"traceback": traceback.format_exc()}) def add_ai_summary_top_patch(file, full_extended_patch): - # below every instance of '## File: ...' in the patch, add the ai-summary metadata - full_extended_patch_lines = full_extended_patch.split("\n") - for i, line in enumerate(full_extended_patch_lines): - if line.startswith("## File:") or line.startswith("## file:"): - full_extended_patch_lines.insert(i + 1, - f"### AI-generated file summary:\n{file.ai_file_summary['long_summary']}") - break - full_extended_patch = "\n".join(full_extended_patch_lines) - return full_extended_patch \ No newline at end of file + try: + # below every instance of '## File: ...' in the patch, add the ai-summary metadata + full_extended_patch_lines = full_extended_patch.split("\n") + for i, line in enumerate(full_extended_patch_lines): + if line.startswith("## File:") or line.startswith("## file:"): + full_extended_patch_lines.insert(i + 1, + f"### AI-generated changes summary:\n{file.ai_file_summary['long_summary']}") + full_extended_patch = "\n".join(full_extended_patch_lines) + return full_extended_patch + + # if no '## File: ...' was found + return full_extended_patch + except Exception as e: + get_logger().error(f"Failed to add AI summary to the top of the patch: {e}", + artifact={"traceback": traceback.format_exc()}) + return full_extended_patch diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index f41b7aed..0a65d2f4 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -316,7 +316,7 @@ class AzureDevopsProvider(GitProvider): new_file_content_str = new_file_content_str.content except Exception as error: - get_logger().error(f"Failed to retrieve new file content of {file} at version {version}. Error: {str(error)}") + get_logger().error(f"Failed to retrieve new file content of {file} at version {version}", error=error) # get_logger().error( # "Failed to retrieve new file content of %s at version %s. Error: %s", # file, @@ -347,7 +347,7 @@ class AzureDevopsProvider(GitProvider): ) original_file_content_str = original_file_content_str.content except Exception as error: - get_logger().error(f"Failed to retrieve original file content of {file} at version {version}. Error: {str(error)}") + get_logger().error(f"Failed to retrieve original file content of {file} at version {version}", error=error) original_file_content_str = "" patch = load_large_diff( @@ -375,7 +375,7 @@ class AzureDevopsProvider(GitProvider): self.diff_files = diff_files return diff_files except Exception as e: - print(f"Error: {str(e)}") + get_logger().exception(f"Failed to get diff files, error: {e}") return [] def publish_comment(self, pr_comment: str, is_temporary: bool = False, thread_context=None): @@ -519,7 +519,6 @@ class AzureDevopsProvider(GitProvider): def get_user_id(self): return 0 - def get_issue_comments(self): threads = self.azure_devops_client.get_threads(repository_id=self.repo_slug, pull_request_id=self.pr_num, project=self.workspace_slug) threads.reverse() @@ -614,3 +613,6 @@ class AzureDevopsProvider(GitProvider): get_logger().error(f"Failed to get pr id, error: {e}") return "" + def publish_file_comments(self, file_comments: list) -> bool: + pass + diff --git a/pr_agent/servers/github_polling.py b/pr_agent/servers/github_polling.py index c33de212..8750f777 100644 --- a/pr_agent/servers/github_polling.py +++ b/pr_agent/servers/github_polling.py @@ -99,14 +99,14 @@ async def is_valid_notification(notification, headers, handled_ids, session, use if not comment_body: get_logger().debug(f"no comment_body") check_prev_comments = True - commenter_github_user = comment['user']['login'] \ - if 'user' in comment else '' - get_logger().info(f"Polling, pr_url: {pr_url}", - artifact={"comment": comment_body}) - user_tag = "@" + user_id - if user_tag not in comment_body: - get_logger().debug(f"user_tag not in comment_body") - check_prev_comments = True + else: + user_tag = "@" + user_id + if user_tag not in comment_body: + get_logger().debug(f"user_tag not in comment_body") + check_prev_comments = True + else: + get_logger().info(f"Polling, pr_url: {pr_url}", + artifact={"comment": comment_body}) if not check_prev_comments: return True, handled_ids, comment, comment_body, pr_url, user_tag @@ -125,6 +125,8 @@ async def is_valid_notification(notification, headers, handled_ids, session, use continue if user_tag in comment_body: get_logger().info("found user tag in previous comments") + get_logger().info(f"Polling, pr_url: {pr_url}", + artifact={"comment": comment_body}) return True, handled_ids, comment, comment_body, pr_url, user_tag get_logger().error(f"Failed to fetch comments for PR: {pr_url}") @@ -188,6 +190,8 @@ async def polling_loop(): get_logger().info(f"Received {len(notifications)} notifications") task_queue = deque() for notification in notifications: + if not notification: + continue # mark notification as read await mark_notification_as_read(headers, notification, session) @@ -204,7 +208,7 @@ async def polling_loop(): task_queue.append((process_comment_sync, (pr_url, rest_of_comment, comment_id))) get_logger().info(f"Queued comment processing for PR: {pr_url}") else: - get_logger().debug(f"Skipping comment processing for PR: {pr_url}") + get_logger().debug(f"Skipping comment processing for PR") max_allowed_parallel_tasks = 10 if task_queue: diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 00439757..37d6f76c 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -7,7 +7,7 @@ The format we will use to present the PR code diff: ====== ## File: 'src/file1.py' {%- if is_ai_metadata %} -### AI-generated file summary: +### AI-generated changes summary: * ... * ... {%- endif %} @@ -39,7 +39,7 @@ __old hunk__ - We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ {%- if is_ai_metadata %} -- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. +- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. Note that this summary may not be fully accurate or complete. {%- endif %} Specific instructions for generating code suggestions: @@ -131,7 +131,7 @@ The format we will use to present the PR code diff: ====== ## File: 'src/file1.py' {%- if is_ai_metadata %} -### AI-generated file summary: +### AI-generated changes summary: * ... * ... {%- endif %} @@ -163,7 +163,7 @@ __old hunk__ - We also added line numbers for the '__new hunk__' code, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and should only used for reference. - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ {%- if is_ai_metadata %} -- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. +- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. Note that this summary may not be fully accurate or complete. {%- endif %} Specific instructions for generating code suggestions: diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml index c880130e..8dc96dc7 100644 --- a/pr_agent/settings/pr_reviewer_prompts.toml +++ b/pr_agent/settings/pr_reviewer_prompts.toml @@ -12,7 +12,7 @@ The format we will use to present the PR code diff: ====== ## File: 'src/file1.py' {%- if is_ai_metadata %} -### AI-generated file summary: +### AI-generated changes summary: * ... * ... {%- endif %} @@ -46,7 +46,7 @@ __old hunk__ - Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ The review should address new code added in the PR code diff (lines starting with '+') {%- if is_ai_metadata %} -- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. +- If available, an AI-generated summary will appear and provide a high-level overview of the file changes. Note that this summary may not be fully accurate or complete. {%- endif %} - When quoting variables or names from the code, use backticks (`) instead of single quote ('). From 1451d82d6b75ba5a1ad1a414415c7a1d0564279b Mon Sep 17 00:00:00 2001 From: Tal Date: Tue, 10 Sep 2024 17:50:32 +0300 Subject: [PATCH 2/2] Update pr_agent/algo/pr_processing.py Co-authored-by: codiumai-pr-agent-pro[bot] <151058649+codiumai-pr-agent-pro[bot]@users.noreply.github.com> --- pr_agent/algo/pr_processing.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pr_agent/algo/pr_processing.py b/pr_agent/algo/pr_processing.py index 5b786de3..f4d9269d 100644 --- a/pr_agent/algo/pr_processing.py +++ b/pr_agent/algo/pr_processing.py @@ -500,16 +500,14 @@ def add_ai_metadata_to_diff_files(git_provider, pr_description_files): if not pr_description_files: get_logger().warning(f"PR description files are empty.") return - available_files = [pr_file['full_file_name'].strip() for pr_file in pr_description_files] + available_files = {pr_file['full_file_name'].strip(): pr_file for pr_file in pr_description_files} diff_files = git_provider.get_diff_files() found_any_match = False for file in diff_files: filename = file.filename.strip() - index_file = available_files.index(filename) - if index_file == -1: - continue - file.ai_file_summary = pr_description_files[index_file] - found_any_match = True + if filename in available_files: + file.ai_file_summary = available_files[filename] + found_any_match = True if not found_any_match: get_logger().error(f"Failed to find any matching files between PR description and diff files.", artifact={"pr_description_files": pr_description_files})