Merge pull request #1687 from benedict-lee/feat/add-conversation-history-on-line-question

Improvement: Enhance ask_line tool by adding PR review comment threads as context
2025-07-21 04:50:39 +08:00 · 2025-04-24 09:32:43 +03:00
parent 6ceea2b134 c35942c12b
commit 5d5b57255e
5 changed files with 108 additions and 2 deletions
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -285,6 +285,9 @@ class GitProvider(ABC):

    def get_comment_url(self, comment) -> str:
        return ""
+           
+    def get_review_thread_comments(self, comment_id: int) -> list[dict]:
+        pass

    #### labels operations ####
    @abstractmethod
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -427,7 +427,41 @@ class GithubProvider(GitProvider):
                self._publish_inline_comments_fallback_with_verification(comments)
            except Exception as e:
                get_logger().error(f"Failed to publish inline code comments fallback, error: {e}")
-                raise e
+                raise e    
+    
+    def get_review_thread_comments(self, comment_id: int) -> list[dict]:
+        """
+        Retrieves all comments in the same thread as the given comment.
+        
+        Args:
+            comment_id: Review comment ID
+                
+        Returns:
+            List of comments in the same thread
+        """
+        try:
+            # Fetch all comments with a single API call
+            all_comments = list(self.pr.get_comments())
+            
+            # Find the target comment by ID
+            target_comment = next((c for c in all_comments if c.id == comment_id), None)
+            if not target_comment:
+                return []
+        
+            # Get root comment id
+            root_comment_id = target_comment.raw_data.get("in_reply_to_id", target_comment.id)
+            # Build the thread - include the root comment and all replies to it
+            thread_comments = [
+    c for c in all_comments if
+    c.id == root_comment_id or c.raw_data.get("in_reply_to_id") == root_comment_id
+]
+        
+        
+            return thread_comments
+                
+        except Exception as e:
+            get_logger().exception(f"Failed to get review comments for an inline ask command", artifact={"comment_id": comment_id, "error": e})
+            return []

    def _publish_inline_comments_fallback_with_verification(self, comments: list[dict]):
        """
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -119,6 +119,7 @@ async_ai_calls=true

 [pr_questions] # /ask #
 enable_help_text=false
+use_conversation_history=true


 [pr_code_suggestions] # /improve #
--- a/pr_agent/settings/pr_line_questions_prompts.toml
+++ b/pr_agent/settings/pr_line_questions_prompts.toml
@ -43,6 +43,19 @@ Now focus on the selected lines from the hunk:
 ======
 Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines

+{%- if conversation_history %}
+
+Previous discussion on this code:
+======
+{{ conversation_history|trim }}
+======
+
+Consider this conversation history (format: "N. Username: Message", where numbers indicate the comment order). When responding:
+- Maintain consistency with previous technical explanations
+- Address unresolved issues from earlier discussions
+- Build upon existing knowledge without contradictions
+- Incorporate relevant context while focusing on the current question
+{%- endif %}

 A question about the selected lines:
 ======
--- a/pr_agent/tools/pr_line_questions.py
+++ b/pr_agent/tools/pr_line_questions.py
@ -14,10 +14,10 @@ from pr_agent.algo.utils import ModelType
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
+from pr_agent.git_providers.github_provider import GithubProvider
 from pr_agent.log import get_logger
 from pr_agent.servers.help import HelpMessage

-
 class PR_LineQuestions:
    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
        self.question_str = self.parse_args(args)
@ -35,6 +35,7 @@ class PR_LineQuestions:
            "question": self.question_str,
            "full_hunk": "",
            "selected_lines": "",
+            "conversation_history": "",  
        }
        self.token_handler = TokenHandler(self.git_provider.pr,
                                          self.vars,
@ -56,6 +57,12 @@ class PR_LineQuestions:
        # if get_settings().config.publish_output:
        #     self.git_provider.publish_comment("Preparing answer...", is_temporary=True)

+        # set conversation history if enabled
+        # currently only supports GitHub provider
+        if get_settings().pr_questions.use_conversation_history and isinstance(self.git_provider, GithubProvider):
+            conversation_history = self._load_conversation_history()
+            self.vars["conversation_history"] = conversation_history
+
        self.patch_with_lines = ""
        ask_diff = get_settings().get('ask_diff_hunk', "")
        line_start = get_settings().get('line_start', '')
@ -92,6 +99,54 @@ class PR_LineQuestions:
                self.git_provider.publish_comment(model_answer_sanitized)

        return ""
+        
+    def _load_conversation_history(self) -> str:
+        """Generate conversation history from the code review thread
+        
+        Returns:
+            str: The formatted conversation history
+        """
+        comment_id = get_settings().get('comment_id', '')
+        file_path = get_settings().get('file_name', '')
+        line_number = get_settings().get('line_end', '')
+        
+        # early return if any required parameter is missing
+        if not all([comment_id, file_path, line_number]):
+            get_logger().error("Missing required parameters for conversation history")
+            return ""
+        
+        try:
+            # retrieve thread comments
+            thread_comments = self.git_provider.get_review_thread_comments(comment_id)
+            
+            # filter and prepare comments
+            filtered_comments = []
+            for comment in thread_comments:
+                body = getattr(comment, 'body', '')
+
+                # skip empty comments, current comment(will be added as a question at prompt)
+                if not body or not body.strip() or comment_id == comment.id:
+                    continue
+                
+                user = comment.user
+                author = user.login if hasattr(user, 'login') else 'Unknown'
+                filtered_comments.append((author, body))
+            
+            # transform conversation history to string using the same pattern as get_commit_messages
+            if filtered_comments:
+                comment_count = len(filtered_comments)
+                get_logger().info(f"Loaded {comment_count} comments from the code review thread")
+                
+                # Format as numbered list, similar to get_commit_messages
+                conversation_history_str = "\n".join([f"{i + 1}. {author}: {body}" 
+                                                   for i, (author, body) in enumerate(filtered_comments)])
+                return conversation_history_str
+            
+            return ""
+        
+        except Exception as e:
+            get_logger().error(f"Error processing conversation history, error: {e}")
+            return ""

    async def _get_prediction(self, model: str):
        variables = copy.deepcopy(self.vars)