Add ask line feature

2025-07-21 04:50:39 +08:00 · 2024-02-15 14:25:22 +02:00
parent 4947c6b841
commit fff52e9e26
8 changed files with 255 additions and 0 deletions
--- a/pr_agent/agent/pr_agent.py
+++ b/pr_agent/agent/pr_agent.py
@ -14,6 +14,7 @@ from pr_agent.tools.pr_config import PRConfig
 from pr_agent.tools.pr_description import PRDescription
 from pr_agent.tools.pr_generate_labels import PRGenerateLabels
 from pr_agent.tools.pr_information_from_user import PRInformationFromUser
+from pr_agent.tools.pr_line_questions import PR_LineQuestions
 from pr_agent.tools.pr_questions import PRQuestions
 from pr_agent.tools.pr_reviewer import PRReviewer
 from pr_agent.tools.pr_similar_issue import PRSimilarIssue
@ -32,6 +33,7 @@ command2class = {
    "improve_code": PRCodeSuggestions,
    "ask": PRQuestions,
    "ask_question": PRQuestions,
+    "ask_line": PR_LineQuestions,
    "update_changelog": PRUpdateChangelog,
    "config": PRConfig,
    "settings": PRConfig,
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -245,3 +245,57 @@ __old hunk__
                patch_with_lines_str += f"{line_old}\n"

    return patch_with_lines_str.rstrip()
+
+
+def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
+
+    patch_with_lines_str = f"\n\n## file: '{file_name.strip()}'\n\n"
+    selected_lines = ""
+    patch_lines = patch.splitlines()
+    RE_HUNK_HEADER = re.compile(
+        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+    match = None
+    start1, size1, start2, size2 = -1, -1, -1, -1
+    skip_hunk = False
+    selected_lines_num = 0
+    for line in patch_lines:
+        if 'no newline at end of file' in line.lower():
+            continue
+
+        if line.startswith('@@'):
+            skip_hunk = False
+            selected_lines_num = 0
+            header_line = line
+            patch_with_lines_str += f'\n{header_line}\n'
+            match = RE_HUNK_HEADER.match(line)
+
+            res = list(match.groups())
+            for i in range(len(res)):
+                if res[i] is None:
+                    res[i] = 0
+            try:
+                start1, size1, start2, size2 = map(int, res[:4])
+            except:  # '@@ -0,0 +1 @@' case
+                start1, size1, size2 = map(int, res[:3])
+                start2 = 0
+
+            # check if line range is in this hunk
+            if side.lower() == 'left':
+                # check if line range is in this hunk
+                if not (start1 <= line_start <= start1 + size1):
+                    skip_hunk = True
+                    continue
+            elif side.lower() == 'right':
+                if not (start2 <= line_start <= start2 + size2):
+                    skip_hunk = True
+                    continue
+
+        elif not skip_hunk:
+            if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:
+                selected_lines += line + '\n'
+            if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:
+                selected_lines += line + '\n'
+            patch_with_lines_str += line + '\n'
+            selected_lines_num += 1
+
+    return patch_with_lines_str.rstrip(), selected_lines.rstrip()
--- a/pr_agent/config_loader.py
+++ b/pr_agent/config_loader.py
@ -18,6 +18,7 @@ global_settings = Dynaconf(
        "settings/language_extensions.toml",
        "settings/pr_reviewer_prompts.toml",
        "settings/pr_questions_prompts.toml",
+        "settings/pr_line_questions_prompts.toml",
        "settings/pr_description_prompts.toml",
        "settings/pr_code_suggestions_prompts.toml",
        "settings/pr_sort_code_suggestions_prompts.toml",
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -44,6 +44,9 @@ class GitProvider(ABC):
    def edit_comment(self, comment, body: str):
        pass

+    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
+        pass
+
    def get_pr_description(self, *, full: bool = True) -> str:
        from pr_agent.config_loader import get_settings
        from pr_agent.algo.utils import clip_tokens
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -384,6 +384,16 @@ class GithubProvider(GitProvider):
    def edit_comment(self, comment, body: str):
        comment.edit(body=body)

+    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
+        try:
+            # self.pr.get_issue_comment(comment_id).edit(body)
+            headers, data_patch = self.pr._requester.requestJsonAndCheck(
+                "POST", f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_num}/comments/{comment_id}/replies",
+                input={"body": body}
+            )
+        except Exception as e:
+            get_logger().exception(f"Failed to reply comment, error: {e}")
+
    def remove_initial_comment(self):
        try:
            for comment in getattr(self.pr, 'comments_list', []):
@ -448,6 +458,13 @@ class GithubProvider(GitProvider):
            return reaction.id
        except Exception as e:
            get_logger().exception(f"Failed to add eyes reaction, error: {e}")
+            try:
+                headers, data_patch = self.pr._requester.requestJsonAndCheck(
+                    "POST", f"https://api.github.com/repos/{self.repo}/pulls/comments/{issue_comment_id}/reactions",
+                    input={"content": "eyes"}
+                )
+            except:
+                pass
            return None

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
--- a/pr_agent/servers/github_app.py
+++ b/pr_agent/servers/github_app.py
@ -100,6 +100,9 @@ async def handle_request(body: Dict[str, Any], event: str):
            api_url = body["issue"]["pull_request"]["url"]
        elif "comment" in body and "pull_request_url" in body["comment"]:
            api_url = body["comment"]["pull_request_url"]
+            if 'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line":
+                comment_body = await handle_line_comments(action, body, comment_body, event)
+
        else:
            return {}
        log_context["api_url"] = api_url
@ -190,6 +193,23 @@ async def handle_request(body: Dict[str, Any], event: str):
    return {}


+async def handle_line_comments(action, body, comment_body, event):
+    # handle line comments
+    start_line = body["comment"]["start_line"]
+    end_line = body["comment"]["line"]
+    start_line = end_line if not start_line else start_line
+    question = comment_body.replace('/ask', '').strip()
+    diff_hunk = body["comment"]["diff_hunk"]
+    get_settings().set("ask_diff_hunk", diff_hunk)
+    path = body["comment"]["path"]
+    side = body["comment"]["side"]
+    comment_id = body["comment"]["id"]
+    if '/ask' in comment_body:
+        get_logger().info(f"Handling line comment because of event={event} and action={action}")
+        comment_body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}"
+    return comment_body
+
+
 def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_user: str) -> Tuple[Dict[str, Any], str]:
    invalid_result = {}, ""
    pull_request = body.get("pull_request")
--- a/pr_agent/settings/pr_line_questions_prompts.toml
+++ b/pr_agent/settings/pr_line_questions_prompts.toml
@ -0,0 +1,53 @@
+[pr_line_questions_prompt]
+system="""You are PR-Reviewer, a language model designed to answer questions about a Git Pull Request (PR).
+
+Your goal is to answer questions\\tasks about specific lines of code in the PR, and provide feedback.
+Be informative, constructive, and give examples. Try to be as specific as possible.
+Don't avoid answering the questions. You must answer the questions, as best as you can, without adding any unrelated content.
+
+Additional guidelines:
+- When quoting variables or names from the code, use backticks (`) instead of single quote (').
+- If relevant, use bullet points.
+- Be short and to the point.
+
+Example Hunk Structure:
+======
+## file: 'src/file1.py'
+
+@@ -12,5 +12,5 @@ def func1():
+code line 1 that remained unchanged in the PR
+code line 2 that remained unchanged in the PR
+-code line that was removed in the PR
+code line added in the PR
+code line 3 that remained unchanged in the PR
+======
+
+"""
+
+user="""PR Info:
+
+Title: '{{title}}'
+
+Branch: '{{branch}}'
+
+
+Here is a context hunk from the PR diff:
+======
+{{ full_hunk|trim }}
+======
+
+
+Now focus on the selected lines from the hunk:
+======
+{{ selected_lines|trim }}
+======
+Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines
+
+
+A question about the selected lines:
+======
+{{ question|trim }}
+======
+
+Response to the question:
+"""
--- a/pr_agent/tools/pr_line_questions.py
+++ b/pr_agent/tools/pr_line_questions.py
@ -0,0 +1,105 @@
+import argparse
+import copy
+from functools import partial
+
+from jinja2 import Environment, StrictUndefined
+
+from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
+from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
+from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, \
+    extract_hunk_lines_from_patch
+from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
+from pr_agent.algo.token_handler import TokenHandler
+from pr_agent.algo.utils import ModelType
+from pr_agent.config_loader import get_settings
+from pr_agent.git_providers import get_git_provider
+from pr_agent.git_providers.git_provider import get_main_pr_language
+from pr_agent.log import get_logger
+from pr_agent.servers.ai_disclaimer import AiDisclaimer
+from pr_agent.servers.help import HelpMessage
+
+
+class PR_LineQuestions:
+    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
+        self.question_str = self.parse_args(args)
+        self.git_provider = get_git_provider()(pr_url)
+
+        self.ai_handler = ai_handler()
+
+        self.vars = {
+            "title": self.git_provider.pr.title,
+            "branch": self.git_provider.get_pr_branch(),
+            "diff": "",  # empty diff for initial calculation
+            "question": self.question_str,
+            "full_hunk": "",
+            "selected_lines": "",
+        }
+        self.token_handler = TokenHandler(self.git_provider.pr,
+                                          self.vars,
+                                          get_settings().pr_line_questions_prompt.system,
+                                          get_settings().pr_line_questions_prompt.user)
+        self.patches_diff = None
+        self.prediction = None
+
+    def parse_args(self, args):
+        if args and len(args) > 0:
+            question_str = " ".join(args)
+        else:
+            question_str = ""
+        return question_str
+
+
+    async def run(self):
+        get_logger().info('Answering a PR lines question...')
+        # if get_settings().config.publish_output:
+        #     self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
+
+        self.patch_with_lines = ""
+        ask_diff = get_settings().get('ask_diff_hunk', "")
+        line_start = get_settings().get('line_start', '')
+        line_end = get_settings().get('line_end', '')
+        side = get_settings().get('side', 'RIGHT')
+        file_name = get_settings().get('file_name', '')
+        comment_id = get_settings().get('comment_id', '')
+        if ask_diff:
+            self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(ask_diff,
+                                                                                       file_name,
+                                                                                       line_start=line_start,
+                                                                                       line_end=line_end,
+                                                                                       side=side
+                                                                                       )
+        else:
+            diff_files = self.git_provider.get_diff_files()
+            for file in diff_files:
+                if file.filename == file_name:
+                    self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(file.patch, file.filename,
+                                                                                               line_start=line_start,
+                                                                                               line_end=line_end,
+                                                                                               side=side)
+        if self.patch_with_lines:
+            response = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.TURBO)
+
+            get_logger().info('Preparing answer...')
+            if comment_id:
+                self.git_provider.reply_to_comment_from_comment_id(comment_id, response)
+            else:
+                self.git_provider.publish_comment(response)
+
+        return ""
+
+    async def _get_prediction(self, model: str):
+        variables = copy.deepcopy(self.vars)
+        variables["full_hunk"] = self.patch_with_lines  # update diff
+        variables["selected_lines"] = self.selected_lines
+        environment = Environment(undefined=StrictUndefined)
+        system_prompt = environment.from_string(get_settings().pr_line_questions_prompt.system).render(variables)
+        user_prompt = environment.from_string(get_settings().pr_line_questions_prompt.user).render(variables)
+        if get_settings().config.verbosity_level >= 2:
+            # get_logger().info(f"\nSystem prompt:\n{system_prompt}")
+            # get_logger().info(f"\nUser prompt:\n{user_prompt}")
+            print(f"\nSystem prompt:\n{system_prompt}")
+            print(f"\nUser prompt:\n{user_prompt}")
+
+        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
+                                                                        system=system_prompt, user=user_prompt)
+        return response