From b53d2773a9eb17c343888b339842fa601ba43d47 Mon Sep 17 00:00:00 2001 From: "benedict.lee" Date: Wed, 9 Apr 2025 23:45:04 +0900 Subject: [PATCH 01/12] improve ask_line tool(add conversation history context) --- pr_agent/git_providers/git_provider.py | 9 ++ pr_agent/git_providers/github_provider.py | 124 ++++++++++++++++++ pr_agent/settings/configuration.toml | 1 + .../settings/pr_line_questions_prompts.toml | 9 ++ pr_agent/tools/pr_line_questions.py | 75 +++++++++++ 5 files changed, 218 insertions(+) diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 0228955e..9f469e56 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -285,6 +285,15 @@ class GitProvider(ABC): def get_comment_url(self, comment) -> str: return "" + + def get_review_comment_by_id(self, comment_id: int): + pass + + def get_review_id_by_comment_id(self, comment_id: int): + pass + + def get_review_thread_comments(self, comment_id: int): + pass #### labels operations #### @abstractmethod diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index e782f9cf..8f021fb1 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -428,6 +428,130 @@ class GithubProvider(GitProvider): except Exception as e: get_logger().error(f"Failed to publish inline code comments fallback, error: {e}") raise e + + def get_review_comment_by_id(self, comment_id: int): + """ + Retrieves a review comment by its ID. + + Args: + comment_id: Review comment ID + + Returns: + Review comment object or None (if not found) + """ + try: + # Using PyGitHub library + # There's no direct way to get PR comment by ID, so we fetch all comments and filter + all_comments = list(self.pr.get_comments()) + for comment in all_comments: + if comment.id == comment_id: + return comment + return None + except Exception as e: + get_logger().warning(f"Failed to get review comment {comment_id}, error: {e}") + return None + + def get_review_id_by_comment_id(self, comment_id: int): + """ + Finds the review ID that a comment belongs to based on its comment ID. + + Args: + comment_id: Review comment ID + + Returns: + Review ID or None (if not found) + """ + try: + comment = self.get_review_comment_by_id(comment_id) + if comment: + return getattr(comment, 'pull_request_review_id', None) + return None + except Exception as e: + get_logger().warning(f"Failed to get review ID for comment {comment_id}, error: {e}") + return None + + def get_review_thread_comments(self, comment_id: int): + """ + Retrieves all comments in the thread that a specific comment belongs to. + + Args: + comment_id: Review comment ID + + Returns: + List of comments in the same thread + """ + try: + # Get comment information + comment = self.get_review_comment_by_id(comment_id) + if not comment: + return [] + + # get all comments + all_comments = list(self.pr.get_comments()) + + # Filter comments in the same thread + thread_comments = [] + in_reply_to_map = {} + + # First build the in_reply_to relationship map + for c in all_comments: + in_reply_to_id = getattr(c, 'in_reply_to_id', None) + if in_reply_to_id: + in_reply_to_map[c.id] = in_reply_to_id + + # Recursively find all ancestor comments (collect comment's ancestors) + def find_ancestors(cid): + ancestors = [] + current = cid + while current in in_reply_to_map: + parent_id = in_reply_to_map[current] + ancestors.append(parent_id) + current = parent_id + return ancestors + + # Recursively find all descendant comments (collect all replies to the comment) + def find_descendants(cid): + descendants = [] + for c in all_comments: + if getattr(c, 'in_reply_to_id', None) == cid: + descendants.append(c.id) + descendants.extend(find_descendants(c.id)) + return descendants + + # Find all descendants of a specific ancestor (including sibling comments) + def find_all_descendants_of_ancestor(ancestor_id): + all_descendants = [] + for c in all_comments: + if getattr(c, 'in_reply_to_id', None) == ancestor_id: + all_descendants.append(c.id) + all_descendants.extend(find_descendants(c.id)) + return all_descendants + + # Collect both ancestor and descendant IDs of the comment + ancestors = find_ancestors(comment_id) + descendants = find_descendants(comment_id) + + # Create thread ID set (self, ancestors, descendants) + thread_ids = set([comment_id] + ancestors + descendants) + + # For each ancestor, include all conversation branches (sibling comments with the same ancestor) + for ancestor_id in ancestors: + sibling_ids = find_all_descendants_of_ancestor(ancestor_id) + thread_ids.update(sibling_ids) + + # Filter to only get comments belonging to the thread + for c in all_comments: + if c.id in thread_ids: + thread_comments.append(c) + + # Sort chronologically (by creation date) + thread_comments.sort(key=lambda c: c.created_at) + + return thread_comments + + except Exception as e: + get_logger().warning(f"Failed to get review thread comments for comment {comment_id}, error: {e}") + return [] def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]): """ diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 1a58cc0b..84893cdd 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -119,6 +119,7 @@ async_ai_calls=true [pr_questions] # /ask # enable_help_text=false +use_conversation_history=true [pr_code_suggestions] # /improve # diff --git a/pr_agent/settings/pr_line_questions_prompts.toml b/pr_agent/settings/pr_line_questions_prompts.toml index 2d32223d..3f1e7970 100644 --- a/pr_agent/settings/pr_line_questions_prompts.toml +++ b/pr_agent/settings/pr_line_questions_prompts.toml @@ -43,6 +43,15 @@ Now focus on the selected lines from the hunk: ====== Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines +{%- if conversation_history %} + +Previous discussion on this code: +====== +{{ conversation_history|trim }} +====== +Consider both the previous review comments from authors and reviewers, as well as any previous questions and answers about this code. The "Previous Question" and "Previous AI Answer" show earlier interactions about the same code. Use this context to provide a more informed and consistent answer. +{%- endif %} + A question about the selected lines: ====== diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index a122c534..77c41079 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -35,6 +35,7 @@ class PR_LineQuestions: "question": self.question_str, "full_hunk": "", "selected_lines": "", + "conversation_history": "", } self.token_handler = TokenHandler(self.git_provider.pr, self.vars, @@ -42,6 +43,9 @@ class PR_LineQuestions: get_settings().pr_line_questions_prompt.user) self.patches_diff = None self.prediction = None + + # get settings for use conversation history + self.use_conversation_history = get_settings().pr_questions.use_conversation_history def parse_args(self, args): if args and len(args) > 0: @@ -56,6 +60,10 @@ class PR_LineQuestions: # if get_settings().config.publish_output: # self.git_provider.publish_comment("Preparing answer...", is_temporary=True) + # set conversation history if enabled + if self.use_conversation_history: + self._load_conversation_history() + self.patch_with_lines = "" ask_diff = get_settings().get('ask_diff_hunk', "") line_start = get_settings().get('line_start', '') @@ -92,6 +100,73 @@ class PR_LineQuestions: self.git_provider.publish_comment(model_answer_sanitized) return "" + + def _load_conversation_history(self): + """generate conversation history from the code review thread""" + try: + comment_id = get_settings().get('comment_id', '') + file_path = get_settings().get('file_name', '') + line_number = get_settings().get('line_end', '') + + # return if no comment id or file path and line number + if not (comment_id or (file_path and line_number)): + return + + # initialize conversation history + conversation_history = [] + + if hasattr(self.git_provider, 'get_review_thread_comments') and comment_id: + try: + # get review thread comments + thread_comments = self.git_provider.get_review_thread_comments(comment_id) + + # current question id (this question is excluded from the context) + current_question_id = comment_id + + # generate conversation history from the comments + for comment in thread_comments: + # skip empty comments + body = getattr(comment, 'body', '') + if not body or not body.strip(): + continue + + # except for current question + if current_question_id and str(comment.id) == str(current_question_id): + continue + + # remove the AI command (/ask etc) from the beginning of the comment (optional) + clean_body = body + if clean_body.startswith('/'): + clean_body = clean_body.split('\n', 1)[-1] if '\n' in clean_body else '' + + if not clean_body.strip(): + continue + + # author info + user = comment.user + author = user.login if hasattr(user, 'login') else 'Unknown' + + # confirm if the author is the current user (AI vs user) + is_ai = 'bot' in author.lower() or '[bot]' in author.lower() + role = 'AI' if is_ai else 'User' + + # append to the conversation history + conversation_history.append(f"{role} ({author}): {clean_body}") + + # transform the conversation history to a string + if conversation_history: + self.vars["conversation_history"] = "\n\n".join(conversation_history) + get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread") + else: + self.vars["conversation_history"] = "" + + except Exception as e: + get_logger().warning(f"Failed to get review thread comments: {e}") + self.vars["conversation_history"] = "" + + except Exception as e: + get_logger().error(f"Error loading conversation history: {e}") + self.vars["conversation_history"] = "" async def _get_prediction(self, model: str): variables = copy.deepcopy(self.vars) From 8952459f6d3328dd423be64d24b155cffdfb0b45 Mon Sep 17 00:00:00 2001 From: Benedict Lee Date: Thu, 10 Apr 2025 08:48:59 +0900 Subject: [PATCH 02/12] Update pr_agent/tools/pr_line_questions.py Co-authored-by: Prateek <110811408+Prateikx@users.noreply.github.com> --- pr_agent/tools/pr_line_questions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index 77c41079..aa8dfed1 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -131,8 +131,8 @@ class PR_LineQuestions: continue # except for current question - if current_question_id and str(comment.id) == str(current_question_id): - continue + # except for current question + if current_question_id and comment.id == current_question_id: # remove the AI command (/ask etc) from the beginning of the comment (optional) clean_body = body From 6bf093a6a12eccc7df73be9226d34ee886a53a6b Mon Sep 17 00:00:00 2001 From: Benedict Lee Date: Thu, 10 Apr 2025 19:41:43 +0900 Subject: [PATCH 03/12] refactor: Add GitHub provider check for conversation history Co-authored-by: ofir-frd <85901822+ofir-frd@users.noreply.github.com> --- pr_agent/tools/pr_line_questions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index aa8dfed1..0157a57b 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -61,7 +61,8 @@ class PR_LineQuestions: # self.git_provider.publish_comment("Preparing answer...", is_temporary=True) # set conversation history if enabled - if self.use_conversation_history: + # currently only supports GitHub provider + if self.use_conversation_history and isinstance(self.git_provider, GithubProvider): self._load_conversation_history() self.patch_with_lines = "" From c5165d917bdf9fbb338816e65239cb83a1b174e1 Mon Sep 17 00:00:00 2001 From: Benedict Lee Date: Thu, 10 Apr 2025 19:59:34 +0900 Subject: [PATCH 04/12] refactor: Validate all required parameters before proceeding Co-authored-by: ofir-frd <85901822+ofir-frd@users.noreply.github.com> --- pr_agent/tools/pr_line_questions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index 0157a57b..a8f310d9 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -109,8 +109,8 @@ class PR_LineQuestions: file_path = get_settings().get('file_name', '') line_number = get_settings().get('line_end', '') - # return if no comment id or file path and line number - if not (comment_id or (file_path and line_number)): + # return if any required parameter is missing + if not all([comment_id, file_path, line_number]): return # initialize conversation history From 9c06b6b26610e23bb601c06bfc049ee7d8ee8b74 Mon Sep 17 00:00:00 2001 From: "benedict.lee" Date: Thu, 10 Apr 2025 21:56:37 +0900 Subject: [PATCH 05/12] Apply PR review feedback: Code style and functionality improvements --- pr_agent/git_providers/git_provider.py | 10 +- pr_agent/git_providers/github_provider.py | 124 +++--------------- .../settings/pr_line_questions_prompts.toml | 3 +- pr_agent/tools/pr_line_questions.py | 94 +++++-------- 4 files changed, 53 insertions(+), 178 deletions(-) diff --git a/pr_agent/git_providers/git_provider.py b/pr_agent/git_providers/git_provider.py index 9f469e56..127fa41e 100644 --- a/pr_agent/git_providers/git_provider.py +++ b/pr_agent/git_providers/git_provider.py @@ -285,14 +285,8 @@ class GitProvider(ABC): def get_comment_url(self, comment) -> str: return "" - - def get_review_comment_by_id(self, comment_id: int): - pass - - def get_review_id_by_comment_id(self, comment_id: int): - pass - - def get_review_thread_comments(self, comment_id: int): + + def get_review_thread_comments(self, comment_id: int) -> list[dict]: pass #### labels operations #### diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 8f021fb1..80581c61 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -427,130 +427,44 @@ class GithubProvider(GitProvider): self._publish_inline_comments_fallback_with_verification(comments) except Exception as e: get_logger().error(f"Failed to publish inline code comments fallback, error: {e}") - raise e - - def get_review_comment_by_id(self, comment_id: int): - """ - Retrieves a review comment by its ID. - - Args: - comment_id: Review comment ID - - Returns: - Review comment object or None (if not found) - """ - try: - # Using PyGitHub library - # There's no direct way to get PR comment by ID, so we fetch all comments and filter - all_comments = list(self.pr.get_comments()) - for comment in all_comments: - if comment.id == comment_id: - return comment - return None - except Exception as e: - get_logger().warning(f"Failed to get review comment {comment_id}, error: {e}") - return None - - def get_review_id_by_comment_id(self, comment_id: int): - """ - Finds the review ID that a comment belongs to based on its comment ID. - - Args: - comment_id: Review comment ID - - Returns: - Review ID or None (if not found) - """ - try: - comment = self.get_review_comment_by_id(comment_id) - if comment: - return getattr(comment, 'pull_request_review_id', None) - return None - except Exception as e: - get_logger().warning(f"Failed to get review ID for comment {comment_id}, error: {e}") - return None + raise e - def get_review_thread_comments(self, comment_id: int): + def get_review_thread_comments(self, comment_id: int) -> list[dict]: """ - Retrieves all comments in the thread that a specific comment belongs to. + Retrieves all comments in the same line as the given comment. Args: comment_id: Review comment ID - + Returns: - List of comments in the same thread + List of comments on the same line """ try: - # Get comment information - comment = self.get_review_comment_by_id(comment_id) + # Get the original comment to find its location + comment = self.pr.get_comment(comment_id) if not comment: return [] + + # Extract file path and line number + file_path = comment.path + line_number = comment.raw_data["line"] if "line" in comment.raw_data else comment.raw_data.get("original_line") - # get all comments + # Get all comments all_comments = list(self.pr.get_comments()) - # Filter comments in the same thread - thread_comments = [] - in_reply_to_map = {} + # Filter comments on the same line of the same file + thread_comments = [ + c for c in all_comments + if c.path == file_path and (c.raw_data.get("line") == line_number or c.raw_data.get("original_line") == line_number) + ] - # First build the in_reply_to relationship map - for c in all_comments: - in_reply_to_id = getattr(c, 'in_reply_to_id', None) - if in_reply_to_id: - in_reply_to_map[c.id] = in_reply_to_id - - # Recursively find all ancestor comments (collect comment's ancestors) - def find_ancestors(cid): - ancestors = [] - current = cid - while current in in_reply_to_map: - parent_id = in_reply_to_map[current] - ancestors.append(parent_id) - current = parent_id - return ancestors - - # Recursively find all descendant comments (collect all replies to the comment) - def find_descendants(cid): - descendants = [] - for c in all_comments: - if getattr(c, 'in_reply_to_id', None) == cid: - descendants.append(c.id) - descendants.extend(find_descendants(c.id)) - return descendants - - # Find all descendants of a specific ancestor (including sibling comments) - def find_all_descendants_of_ancestor(ancestor_id): - all_descendants = [] - for c in all_comments: - if getattr(c, 'in_reply_to_id', None) == ancestor_id: - all_descendants.append(c.id) - all_descendants.extend(find_descendants(c.id)) - return all_descendants - - # Collect both ancestor and descendant IDs of the comment - ancestors = find_ancestors(comment_id) - descendants = find_descendants(comment_id) - - # Create thread ID set (self, ancestors, descendants) - thread_ids = set([comment_id] + ancestors + descendants) - - # For each ancestor, include all conversation branches (sibling comments with the same ancestor) - for ancestor_id in ancestors: - sibling_ids = find_all_descendants_of_ancestor(ancestor_id) - thread_ids.update(sibling_ids) - - # Filter to only get comments belonging to the thread - for c in all_comments: - if c.id in thread_ids: - thread_comments.append(c) - - # Sort chronologically (by creation date) + # Sort chronologically thread_comments.sort(key=lambda c: c.created_at) return thread_comments except Exception as e: - get_logger().warning(f"Failed to get review thread comments for comment {comment_id}, error: {e}") + get_logger().warning(f"Failed to get review comments for comment {comment_id}, error: {e}") return [] def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]): diff --git a/pr_agent/settings/pr_line_questions_prompts.toml b/pr_agent/settings/pr_line_questions_prompts.toml index 3f1e7970..853a7975 100644 --- a/pr_agent/settings/pr_line_questions_prompts.toml +++ b/pr_agent/settings/pr_line_questions_prompts.toml @@ -49,10 +49,9 @@ Previous discussion on this code: ====== {{ conversation_history|trim }} ====== -Consider both the previous review comments from authors and reviewers, as well as any previous questions and answers about this code. The "Previous Question" and "Previous AI Answer" show earlier interactions about the same code. Use this context to provide a more informed and consistent answer. +Use this prior discussion context to provide a consistent and informed answer. {%- endif %} - A question about the selected lines: ====== {{ question|trim }} diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index a8f310d9..2a97f9f5 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -16,7 +16,7 @@ from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.log import get_logger from pr_agent.servers.help import HelpMessage - +from pr_agent.git_providers.github_provider import GithubProvider class PR_LineQuestions: def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): @@ -43,9 +43,6 @@ class PR_LineQuestions: get_settings().pr_line_questions_prompt.user) self.patches_diff = None self.prediction = None - - # get settings for use conversation history - self.use_conversation_history = get_settings().pr_questions.use_conversation_history def parse_args(self, args): if args and len(args) > 0: @@ -62,7 +59,7 @@ class PR_LineQuestions: # set conversation history if enabled # currently only supports GitHub provider - if self.use_conversation_history and isinstance(self.git_provider, GithubProvider): + if get_settings().pr_questions.use_conversation_history and isinstance(self.git_provider, GithubProvider): self._load_conversation_history() self.patch_with_lines = "" @@ -104,70 +101,41 @@ class PR_LineQuestions: def _load_conversation_history(self): """generate conversation history from the code review thread""" + # set conversation history to empty string + self.vars["conversation_history"] = "" + + comment_id = get_settings().get('comment_id', '') + file_path = get_settings().get('file_name', '') + line_number = get_settings().get('line_end', '') + + # early return if any required parameter is missing + if not all([comment_id, file_path, line_number]): + return + try: - comment_id = get_settings().get('comment_id', '') - file_path = get_settings().get('file_name', '') - line_number = get_settings().get('line_end', '') + # retrieve thread comments + thread_comments = self.git_provider.get_review_thread_comments(comment_id) - # return if any required parameter is missing - if not all([comment_id, file_path, line_number]): - return - - # initialize conversation history + # generate conversation history conversation_history = [] + for comment in thread_comments: + body = getattr(comment, 'body', '') + + # skip empty comments, current comment(will be added as a question at prompt) + if not body or not body.strip() or comment_id == comment.id: + continue + + user = comment.user + author = user.login if hasattr(user, 'login') else 'Unknown' + conversation_history.append(f"{author}: {body}") - if hasattr(self.git_provider, 'get_review_thread_comments') and comment_id: - try: - # get review thread comments - thread_comments = self.git_provider.get_review_thread_comments(comment_id) - - # current question id (this question is excluded from the context) - current_question_id = comment_id - - # generate conversation history from the comments - for comment in thread_comments: - # skip empty comments - body = getattr(comment, 'body', '') - if not body or not body.strip(): - continue - - # except for current question - # except for current question - if current_question_id and comment.id == current_question_id: - - # remove the AI command (/ask etc) from the beginning of the comment (optional) - clean_body = body - if clean_body.startswith('/'): - clean_body = clean_body.split('\n', 1)[-1] if '\n' in clean_body else '' - - if not clean_body.strip(): - continue - - # author info - user = comment.user - author = user.login if hasattr(user, 'login') else 'Unknown' - - # confirm if the author is the current user (AI vs user) - is_ai = 'bot' in author.lower() or '[bot]' in author.lower() - role = 'AI' if is_ai else 'User' - - # append to the conversation history - conversation_history.append(f"{role} ({author}): {clean_body}") - - # transform the conversation history to a string - if conversation_history: - self.vars["conversation_history"] = "\n\n".join(conversation_history) - get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread") - else: - self.vars["conversation_history"] = "" - - except Exception as e: - get_logger().warning(f"Failed to get review thread comments: {e}") - self.vars["conversation_history"] = "" + # transform and save conversation history + if conversation_history: + self.vars["conversation_history"] = "\n\n".join(conversation_history) + get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread") except Exception as e: - get_logger().error(f"Error loading conversation history: {e}") - self.vars["conversation_history"] = "" + get_logger().error(f"Error processing conversation history, error: {e}") async def _get_prediction(self, model: str): variables = copy.deepcopy(self.vars) From a434d0af9bb229531d067f96e6d6dd78d489196e Mon Sep 17 00:00:00 2001 From: "benedict.lee" Date: Mon, 21 Apr 2025 16:28:42 +0900 Subject: [PATCH 06/12] Improve comment thread retrieval by using in_reply_to_id instead of line numbers --- pr_agent/git_providers/github_provider.py | 38 +++++++++++++---------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 80581c61..2e3a8bf0 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -431,36 +431,40 @@ class GithubProvider(GitProvider): def get_review_thread_comments(self, comment_id: int) -> list[dict]: """ - Retrieves all comments in the same line as the given comment. + Retrieves all comments in the same thread as the given comment. Args: comment_id: Review comment ID Returns: - List of comments on the same line + List of comments in the same thread """ try: - # Get the original comment to find its location - comment = self.pr.get_comment(comment_id) - if not comment: - return [] - - # Extract file path and line number - file_path = comment.path - line_number = comment.raw_data["line"] if "line" in comment.raw_data else comment.raw_data.get("original_line") - - # Get all comments + # Fetch all comments with a single API call all_comments = list(self.pr.get_comments()) - # Filter comments on the same line of the same file + # Find the target comment by ID + target_comment = next((c for c in all_comments if c.id == comment_id), None) + if not target_comment: + return [] + + # First, identify if this is a reply to another comment + root_comment_id = target_comment.raw_data.get("in_reply_to_id", target_comment.id) + if root_comment_id != target_comment.id: + # If this is a reply, find the root comment + root_comment = next((c for c in all_comments if c.id == root_comment_id), None) + if root_comment: + target_comment = root_comment + + # Build the thread - include the root comment and all replies to it thread_comments = [ - c for c in all_comments - if c.path == file_path and (c.raw_data.get("line") == line_number or c.raw_data.get("original_line") == line_number) + c for c in all_comments if + c.id == target_comment.id or c.raw_data.get("in_reply_to_id") == target_comment.id ] - + # Sort chronologically thread_comments.sort(key=lambda c: c.created_at) - + return thread_comments except Exception as e: From e11c2e1c7f7a001e0e2f94fa46aa6437d57a2b33 Mon Sep 17 00:00:00 2001 From: "benedict.lee" Date: Mon, 21 Apr 2025 16:30:27 +0900 Subject: [PATCH 07/12] Reorganize imports according to Python conventions --- pr_agent/tools/pr_line_questions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index 2a97f9f5..073e03eb 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -14,9 +14,9 @@ from pr_agent.algo.utils import ModelType from pr_agent.config_loader import get_settings from pr_agent.git_providers import get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language +from pr_agent.git_providers.github_provider import GithubProvider from pr_agent.log import get_logger from pr_agent.servers.help import HelpMessage -from pr_agent.git_providers.github_provider import GithubProvider class PR_LineQuestions: def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): From 8b4bf49f1cc9ca97dc01782af4636e9ec40d0aac Mon Sep 17 00:00:00 2001 From: "benedict.lee" Date: Mon, 21 Apr 2025 16:50:37 +0900 Subject: [PATCH 08/12] Improve conversation history handling and prompts for line questions --- .../settings/pr_line_questions_prompts.toml | 7 +++++- pr_agent/tools/pr_line_questions.py | 22 ++++++++++++------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/pr_agent/settings/pr_line_questions_prompts.toml b/pr_agent/settings/pr_line_questions_prompts.toml index 853a7975..24608313 100644 --- a/pr_agent/settings/pr_line_questions_prompts.toml +++ b/pr_agent/settings/pr_line_questions_prompts.toml @@ -49,7 +49,12 @@ Previous discussion on this code: ====== {{ conversation_history|trim }} ====== -Use this prior discussion context to provide a consistent and informed answer. + +Consider this conversation history (format: "Username: Message"). When responding: +- Maintain consistency with previous technical explanations +- Address unresolved issues from earlier discussions +- Build upon existing knowledge without contradictions +- Incorporate relevant context while focusing on the current question {%- endif %} A question about the selected lines: diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index 073e03eb..c5cc234c 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -60,7 +60,8 @@ class PR_LineQuestions: # set conversation history if enabled # currently only supports GitHub provider if get_settings().pr_questions.use_conversation_history and isinstance(self.git_provider, GithubProvider): - self._load_conversation_history() + conversation_history = self._load_conversation_history() + self.vars["conversation_history"] = conversation_history self.patch_with_lines = "" ask_diff = get_settings().get('ask_diff_hunk', "") @@ -99,18 +100,20 @@ class PR_LineQuestions: return "" - def _load_conversation_history(self): - """generate conversation history from the code review thread""" - # set conversation history to empty string - self.vars["conversation_history"] = "" + def _load_conversation_history(self) -> str: + """Generate conversation history from the code review thread + Returns: + str: The formatted conversation history + """ comment_id = get_settings().get('comment_id', '') file_path = get_settings().get('file_name', '') line_number = get_settings().get('line_end', '') # early return if any required parameter is missing if not all([comment_id, file_path, line_number]): - return + get_logger().error("Missing required parameters for conversation history") + return "" try: # retrieve thread comments @@ -129,13 +132,16 @@ class PR_LineQuestions: author = user.login if hasattr(user, 'login') else 'Unknown' conversation_history.append(f"{author}: {body}") - # transform and save conversation history + # transform conversation history to string if conversation_history: - self.vars["conversation_history"] = "\n\n".join(conversation_history) get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread") + return "\n\n".join(conversation_history) + + return "" except Exception as e: get_logger().error(f"Error processing conversation history, error: {e}") + return "" async def _get_prediction(self, model: str): variables = copy.deepcopy(self.vars) From 9906ec3687dbfc2666731ab45c5308059f428a68 Mon Sep 17 00:00:00 2001 From: "benedict.lee" Date: Mon, 21 Apr 2025 17:14:36 +0900 Subject: [PATCH 09/12] Improve conversation history formatting with numbered comments --- .../settings/pr_line_questions_prompts.toml | 2 +- pr_agent/tools/pr_line_questions.py | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/pr_agent/settings/pr_line_questions_prompts.toml b/pr_agent/settings/pr_line_questions_prompts.toml index 24608313..93cdab86 100644 --- a/pr_agent/settings/pr_line_questions_prompts.toml +++ b/pr_agent/settings/pr_line_questions_prompts.toml @@ -50,7 +50,7 @@ Previous discussion on this code: {{ conversation_history|trim }} ====== -Consider this conversation history (format: "Username: Message"). When responding: +Consider this conversation history (format: "N. Username: Message", where numbers indicate the comment order). When responding: - Maintain consistency with previous technical explanations - Address unresolved issues from earlier discussions - Build upon existing knowledge without contradictions diff --git a/pr_agent/tools/pr_line_questions.py b/pr_agent/tools/pr_line_questions.py index c5cc234c..f373a4a1 100644 --- a/pr_agent/tools/pr_line_questions.py +++ b/pr_agent/tools/pr_line_questions.py @@ -119,8 +119,8 @@ class PR_LineQuestions: # retrieve thread comments thread_comments = self.git_provider.get_review_thread_comments(comment_id) - # generate conversation history - conversation_history = [] + # filter and prepare comments + filtered_comments = [] for comment in thread_comments: body = getattr(comment, 'body', '') @@ -130,12 +130,17 @@ class PR_LineQuestions: user = comment.user author = user.login if hasattr(user, 'login') else 'Unknown' - conversation_history.append(f"{author}: {body}") + filtered_comments.append((author, body)) - # transform conversation history to string - if conversation_history: - get_logger().info(f"Loaded {len(conversation_history)} comments from the code review thread") - return "\n\n".join(conversation_history) + # transform conversation history to string using the same pattern as get_commit_messages + if filtered_comments: + comment_count = len(filtered_comments) + get_logger().info(f"Loaded {comment_count} comments from the code review thread") + + # Format as numbered list, similar to get_commit_messages + conversation_history_str = "\n".join([f"{i + 1}. {author}: {body}" + for i, (author, body) in enumerate(filtered_comments)]) + return conversation_history_str return "" From 29d4fe510e930fd04e6feeec546f4189da06420d Mon Sep 17 00:00:00 2001 From: Benedict Lee Date: Thu, 24 Apr 2025 11:21:49 +0900 Subject: [PATCH 10/12] Improve get_review_thread_comments method implementation Co-authored-by: ofir-frd <85901822+ofir-frd@users.noreply.github.com> --- pr_agent/git_providers/github_provider.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index 2e3a8bf0..daa0c84b 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -448,19 +448,13 @@ class GithubProvider(GitProvider): if not target_comment: return [] - # First, identify if this is a reply to another comment + # Get root comment id root_comment_id = target_comment.raw_data.get("in_reply_to_id", target_comment.id) - if root_comment_id != target_comment.id: - # If this is a reply, find the root comment - root_comment = next((c for c in all_comments if c.id == root_comment_id), None) - if root_comment: - target_comment = root_comment - # Build the thread - include the root comment and all replies to it thread_comments = [ - c for c in all_comments if - c.id == target_comment.id or c.raw_data.get("in_reply_to_id") == target_comment.id - ] + c for c in all_comments if + c.id == root_comment_id or c.raw_data.get("in_reply_to_id") == root_comment_id +] # Sort chronologically thread_comments.sort(key=lambda c: c.created_at) From ddb94ec9b40d3cefaf51c40a0b38f4c0d6428483 Mon Sep 17 00:00:00 2001 From: Benedict Lee Date: Thu, 24 Apr 2025 11:22:43 +0900 Subject: [PATCH 11/12] mprove get_review_thread_comments method implementation Co-authored-by: ofir-frd <85901822+ofir-frd@users.noreply.github.com> --- pr_agent/git_providers/github_provider.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index daa0c84b..a526f594 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -456,8 +456,6 @@ class GithubProvider(GitProvider): c.id == root_comment_id or c.raw_data.get("in_reply_to_id") == root_comment_id ] - # Sort chronologically - thread_comments.sort(key=lambda c: c.created_at) return thread_comments From c35942c12ba92a03e7fca1c89a0d72d322adffd2 Mon Sep 17 00:00:00 2001 From: Benedict Lee Date: Thu, 24 Apr 2025 11:23:16 +0900 Subject: [PATCH 12/12] mprove get_review_thread_comments method implementation Co-authored-by: ofir-frd <85901822+ofir-frd@users.noreply.github.com> --- pr_agent/git_providers/github_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index a526f594..485d5afe 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -460,7 +460,7 @@ class GithubProvider(GitProvider): return thread_comments except Exception as e: - get_logger().warning(f"Failed to get review comments for comment {comment_id}, error: {e}") + get_logger().exception(f"Failed to get review comments for an inline ask command", artifact={"comment_id": comment_id, "error": e}) return [] def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):