Merge pull request #661 from Codium-ai/hl/ask_line

Hl/ask line
2025-07-21 04:50:39 +08:00 · 2024-02-17 22:08:55 -08:00
parent cd8a40c7a6 5918943959
commit 7b15101051
17 changed files with 321 additions and 11 deletions
--- a/docs/ASK.md
+++ b/docs/ASK.md
@ -12,4 +12,13 @@ ___
 <kbd><img src=https://codium.ai/images/pr_agent/ask.png width="768"></kbd>
 ___

+## Ask lines
+You can run `/ask` on specific lines of code in the PR from the PR's diff view. The tool will answer questions based on the code changes in the selected lines.
+- Click on the '+' sign next to the line number to select the line.
+- To select multiple lines, click on the '+' sign of the first line and then hold and drag to select the rest of the lines. 
+- write `/ask "..."` in the comment box and press `Add single comment` button.
+
+<kbd><img src=https://codium.ai/images/pr_agent/Ask_line.png width="768"></kbd>
+
+
 Note that the tool does not have "memory" of previous questions, and answers each question independently.
--- a/pr_agent/agent/pr_agent.py
+++ b/pr_agent/agent/pr_agent.py
@ -14,6 +14,7 @@ from pr_agent.tools.pr_config import PRConfig
 from pr_agent.tools.pr_description import PRDescription
 from pr_agent.tools.pr_generate_labels import PRGenerateLabels
 from pr_agent.tools.pr_information_from_user import PRInformationFromUser
+from pr_agent.tools.pr_line_questions import PR_LineQuestions
 from pr_agent.tools.pr_questions import PRQuestions
 from pr_agent.tools.pr_reviewer import PRReviewer
 from pr_agent.tools.pr_similar_issue import PRSimilarIssue
@ -32,6 +33,7 @@ command2class = {
    "improve_code": PRCodeSuggestions,
    "ask": PRQuestions,
    "ask_question": PRQuestions,
+    "ask_line": PR_LineQuestions,
    "update_changelog": PRUpdateChangelog,
    "config": PRConfig,
    "settings": PRConfig,
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@ -245,3 +245,59 @@ __old hunk__
                patch_with_lines_str += f"{line_old}\n"

    return patch_with_lines_str.rstrip()
+
+
+def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, side) -> tuple[str, str]:
+
+    patch_with_lines_str = f"\n\n## file: '{file_name.strip()}'\n\n"
+    selected_lines = ""
+    patch_lines = patch.splitlines()
+    RE_HUNK_HEADER = re.compile(
+        r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)")
+    match = None
+    start1, size1, start2, size2 = -1, -1, -1, -1
+    skip_hunk = False
+    selected_lines_num = 0
+    for line in patch_lines:
+        if 'no newline at end of file' in line.lower():
+            continue
+
+        if line.startswith('@@'):
+            skip_hunk = False
+            selected_lines_num = 0
+            header_line = line
+
+            match = RE_HUNK_HEADER.match(line)
+
+            res = list(match.groups())
+            for i in range(len(res)):
+                if res[i] is None:
+                    res[i] = 0
+            try:
+                start1, size1, start2, size2 = map(int, res[:4])
+            except:  # '@@ -0,0 +1 @@' case
+                start1, size1, size2 = map(int, res[:3])
+                start2 = 0
+
+            # check if line range is in this hunk
+            if side.lower() == 'left':
+                # check if line range is in this hunk
+                if not (start1 <= line_start <= start1 + size1):
+                    skip_hunk = True
+                    continue
+            elif side.lower() == 'right':
+                if not (start2 <= line_start <= start2 + size2):
+                    skip_hunk = True
+                    continue
+            patch_with_lines_str += f'\n{header_line}\n'
+
+        elif not skip_hunk:
+            if side.lower() == 'right' and line_start <= start2 + selected_lines_num <= line_end:
+                selected_lines += line + '\n'
+            if side.lower() == 'left' and start1 <= selected_lines_num + start1 <= line_end:
+                selected_lines += line + '\n'
+            patch_with_lines_str += line + '\n'
+            if not line.startswith('-'): # currently we don't support /ask line for deleted lines
+                selected_lines_num += 1
+
+    return patch_with_lines_str.rstrip(), selected_lines.rstrip()
--- a/pr_agent/config_loader.py
+++ b/pr_agent/config_loader.py
@ -18,6 +18,7 @@ global_settings = Dynaconf(
        "settings/language_extensions.toml",
        "settings/pr_reviewer_prompts.toml",
        "settings/pr_questions_prompts.toml",
+        "settings/pr_line_questions_prompts.toml",
        "settings/pr_description_prompts.toml",
        "settings/pr_code_suggestions_prompts.toml",
        "settings/pr_sort_code_suggestions_prompts.toml",
--- a/pr_agent/git_providers/azuredevops_provider.py
+++ b/pr_agent/git_providers/azuredevops_provider.py
@ -479,7 +479,7 @@ class AzureDevopsProvider(GitProvider):
            "Azure DevOps provider does not support issue comments yet"
        )

-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
--- a/pr_agent/git_providers/bitbucket_provider.py
+++ b/pr_agent/git_providers/bitbucket_provider.py
@ -298,7 +298,7 @@ class BitbucketProvider(GitProvider):
            "Bitbucket provider does not support issue comments yet"
        )

-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
--- a/pr_agent/git_providers/bitbucket_server_provider.py
+++ b/pr_agent/git_providers/bitbucket_server_provider.py
@ -288,7 +288,7 @@ class BitbucketServerProvider(GitProvider):
            "Bitbucket provider does not support issue comments yet"
        )

-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
--- a/pr_agent/git_providers/codecommit_provider.py
+++ b/pr_agent/git_providers/codecommit_provider.py
@ -297,7 +297,7 @@ class CodeCommitProvider(GitProvider):
        settings_filename = ".pr_agent.toml"
        return self.codecommit_client.get_file(self.repo_name, settings_filename, self.pr.source_commit, optional=True)

-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        get_logger().info("CodeCommit provider does not support eyes reaction yet")
        return True

--- a/pr_agent/git_providers/gerrit_provider.py
+++ b/pr_agent/git_providers/gerrit_provider.py
@ -212,7 +212,7 @@ class GerritProvider(GitProvider):
        raise NotImplementedError(
            'Getting labels is not implemented for the gerrit provider')

-    def add_eyes_reaction(self, issue_comment_id: int):
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False):
        raise NotImplementedError(
            'Adding reactions is not implemented for the gerrit provider')

--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -44,6 +44,9 @@ class GitProvider(ABC):
    def edit_comment(self, comment, body: str):
        pass

+    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
+        pass
+
    def get_pr_description(self, *, full: bool = True) -> str:
        from pr_agent.config_loader import get_settings
        from pr_agent.algo.utils import clip_tokens
@ -159,7 +162,7 @@ class GitProvider(ABC):
        pass

    @abstractmethod
-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        pass

    @abstractmethod
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -384,6 +384,16 @@ class GithubProvider(GitProvider):
    def edit_comment(self, comment, body: str):
        comment.edit(body=body)

+    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
+        try:
+            # self.pr.get_issue_comment(comment_id).edit(body)
+            headers, data_patch = self.pr._requester.requestJsonAndCheck(
+                "POST", f"https://api.github.com/repos/{self.repo}/pulls/{self.pr_num}/comments/{comment_id}/replies",
+                input={"body": body}
+            )
+        except Exception as e:
+            get_logger().exception(f"Failed to reply comment, error: {e}")
+
    def remove_initial_comment(self):
        try:
            for comment in getattr(self.pr, 'comments_list', []):
@ -442,12 +452,21 @@ class GithubProvider(GitProvider):
        except Exception:
            return ""

-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
+        if disable_eyes:
+            return None
        try:
            reaction = self.pr.get_issue_comment(issue_comment_id).create_reaction("eyes")
            return reaction.id
        except Exception as e:
            get_logger().exception(f"Failed to add eyes reaction, error: {e}")
+            try:
+                headers, data_patch = self.pr._requester.requestJsonAndCheck(
+                    "POST", f"https://api.github.com/repos/{self.repo}/pulls/comments/{issue_comment_id}/reactions",
+                    input={"content": "eyes"}
+                )
+            except:
+                pass
            return None

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@ -181,6 +181,10 @@ class GitLabProvider(GitProvider):
    def edit_comment(self, comment, body: str):
        self.mr.notes.update(comment.id,{'body': body} )

+    def reply_to_comment_from_comment_id(self, comment_id: int, body: str):
+        discussion = self.mr.discussions.get(comment_id)
+        discussion.notes.create({'body': body})
+
    def publish_inline_comment(self, body: str, relevant_file: str, relevant_line_in_file: str):
        edit_type, found, source_line_no, target_file, target_line_no = self.search_line(relevant_file,
                                                                                         relevant_line_in_file)
@ -364,7 +368,7 @@ class GitLabProvider(GitProvider):
        except Exception:
            return ""

-    def add_eyes_reaction(self, issue_comment_id: int) -> Optional[int]:
+    def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) -> Optional[int]:
        return True

    def remove_reaction(self, issue_comment_id: int, reaction_id: int) -> bool:
--- a/pr_agent/servers/github_action_runner.py
+++ b/pr_agent/servers/github_action_runner.py
@ -8,6 +8,7 @@ from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider
 from pr_agent.git_providers.utils import apply_repo_settings
 from pr_agent.log import get_logger
+from pr_agent.servers.github_app import handle_line_comments
 from pr_agent.tools.pr_code_suggestions import PRCodeSuggestions
 from pr_agent.tools.pr_description import PRDescription
 from pr_agent.tools.pr_reviewer import PRReviewer
@ -102,24 +103,38 @@ async def run_action():
                    await PRCodeSuggestions(pr_url).run()

    # Handle issue comment event
-    elif GITHUB_EVENT_NAME == "issue_comment":
+    elif GITHUB_EVENT_NAME == "issue_comment" or GITHUB_EVENT_NAME == "pull_request_review_comment":
        action = event_payload.get("action")
        if action in ["created", "edited"]:
            comment_body = event_payload.get("comment", {}).get("body")
+            try:
+                if GITHUB_EVENT_NAME == "pull_request_review_comment":
+                    if '/ask' in comment_body:
+                        comment_body = handle_line_comments(event_payload, comment_body)
+            except Exception as e:
+                get_logger().error(f"Failed to handle line comments: {e}")
+                return
            if comment_body:
                is_pr = False
+                disable_eyes = False
                # check if issue is pull request
                if event_payload.get("issue", {}).get("pull_request"):
                    url = event_payload.get("issue", {}).get("pull_request", {}).get("url")
                    is_pr = True
+                elif event_payload.get("comment", {}).get("pull_request_url"): # for 'pull_request_review_comment
+                    url = event_payload.get("comment", {}).get("pull_request_url")
+                    is_pr = True
+                    disable_eyes = True
                else:
                    url = event_payload.get("issue", {}).get("url")
+
                if url:
                    body = comment_body.strip().lower()
                    comment_id = event_payload.get("comment", {}).get("id")
                    provider = get_git_provider()(pr_url=url)
                    if is_pr:
-                        await PRAgent().handle_request(url, body, notify=lambda: provider.add_eyes_reaction(comment_id))
+                        await PRAgent().handle_request(url, body,
+                                    notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))
                    else:
                        await PRAgent().handle_request(url, body)

--- a/pr_agent/servers/github_app.py
+++ b/pr_agent/servers/github_app.py
@ -96,10 +96,19 @@ async def handle_request(body: Dict[str, Any], event: str):
            get_logger().info(f"Ignoring comment from {bot_user} user")
            return {}
        get_logger().info(f"Processing comment from {sender} user")
+        disable_eyes = False
        if "issue" in body and "pull_request" in body["issue"] and "url" in body["issue"]["pull_request"]:
            api_url = body["issue"]["pull_request"]["url"]
        elif "comment" in body and "pull_request_url" in body["comment"]:
            api_url = body["comment"]["pull_request_url"]
+            try:
+                if ('/ask' in comment_body and
+                        'subject_type' in body["comment"] and body["comment"]["subject_type"] == "line"):
+                    comment_body = handle_line_comments(body, comment_body)
+                    disable_eyes = True
+            except Exception as e:
+                get_logger().error(f"Failed to handle line comments: {e}")
+
        else:
            return {}
        log_context["api_url"] = api_url
@ -108,7 +117,8 @@ async def handle_request(body: Dict[str, Any], event: str):
        comment_id = body.get("comment", {}).get("id")
        provider = get_git_provider()(pr_url=api_url)
        with get_logger().contextualize(**log_context):
-            await agent.handle_request(api_url, comment_body, notify=lambda: provider.add_eyes_reaction(comment_id))
+            await agent.handle_request(api_url, comment_body,
+                            notify=lambda: provider.add_eyes_reaction(comment_id, disable_eyes=disable_eyes))

    # handle pull_request event:
    #   automatically review opened/reopened/ready_for_review PRs as long as they're not in draft,
@ -190,6 +200,22 @@ async def handle_request(body: Dict[str, Any], event: str):
    return {}


+def handle_line_comments(body, comment_body):
+    # handle line comments
+    start_line = body["comment"]["start_line"]
+    end_line = body["comment"]["line"]
+    start_line = end_line if not start_line else start_line
+    question = comment_body.replace('/ask', '').strip()
+    diff_hunk = body["comment"]["diff_hunk"]
+    get_settings().set("ask_diff_hunk", diff_hunk)
+    path = body["comment"]["path"]
+    side = body["comment"]["side"]
+    comment_id = body["comment"]["id"]
+    if '/ask' in comment_body:
+        comment_body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}"
+    return comment_body
+
+
 def _check_pull_request_event(action: str, body: dict, log_context: dict, bot_user: str) -> Tuple[Dict[str, Any], str]:
    invalid_result = {}, ""
    pull_request = body.get("pull_request")
--- a/pr_agent/servers/gitlab_webhook.py
+++ b/pr_agent/servers/gitlab_webhook.py
@ -64,7 +64,25 @@ async def gitlab_webhook(background_tasks: BackgroundTasks, request: Request):
            mr = data['merge_request']
            url = mr.get('url')
            body = data.get('object_attributes', {}).get('note')
+            if data.get('object_attributes', {}).get('type') == 'DiffNote' and '/ask' in body:
+                line_range_ = data['object_attributes']['position']['line_range']
+
+                # if line_range_['start']['type'] == 'new':
+                start_line = line_range_['start']['new_line']
+                end_line = line_range_['end']['new_line']
+                # else:
+                #     start_line = line_range_['start']['old_line']
+                #     end_line = line_range_['end']['old_line']
+
+                question = body.replace('/ask', '').strip()
+                path = data['object_attributes']['position']['new_path']
+                side = 'RIGHT'# if line_range_['start']['type'] == 'new' else 'LEFT'
+                comment_id = data['object_attributes']["discussion_id"]
+                get_logger().info(f"Handling line comment")
+                body = f"/ask_line --line_start={start_line} --line_end={end_line} --side={side} --file_name={path} --comment_id={comment_id} {question}"
+
            handle_request(background_tasks, url, body, log_context)
+
    return JSONResponse(status_code=status.HTTP_200_OK, content=jsonable_encoder({"message": "success"}))


--- a/pr_agent/settings/pr_line_questions_prompts.toml
+++ b/pr_agent/settings/pr_line_questions_prompts.toml
@ -0,0 +1,53 @@
+[pr_line_questions_prompt]
+system="""You are PR-Reviewer, a language model designed to answer questions about a Git Pull Request (PR).
+
+Your goal is to answer questions\\tasks about specific lines of code in the PR, and provide feedback.
+Be informative, constructive, and give examples. Try to be as specific as possible.
+Don't avoid answering the questions. You must answer the questions, as best as you can, without adding any unrelated content.
+
+Additional guidelines:
+- When quoting variables or names from the code, use backticks (`) instead of single quote (').
+- If relevant, use bullet points.
+- Be short and to the point.
+
+Example Hunk Structure:
+======
+## file: 'src/file1.py'
+
+@@ -12,5 +12,5 @@ def func1():
+code line 1 that remained unchanged in the PR
+code line 2 that remained unchanged in the PR
+-code line that was removed in the PR
+code line added in the PR
+code line 3 that remained unchanged in the PR
+======
+
+"""
+
+user="""PR Info:
+
+Title: '{{title}}'
+
+Branch: '{{branch}}'
+
+
+Here is a context hunk from the PR diff:
+======
+{{ full_hunk|trim }}
+======
+
+
+Now focus on the selected lines from the hunk:
+======
+{{ selected_lines|trim }}
+======
+Note that lines in the diff body are prefixed with a symbol that represents the type of change: '-' for deletions, '+' for additions, and ' ' (a space) for unchanged lines
+
+
+A question about the selected lines:
+======
+{{ question|trim }}
+======
+
+Response to the question:
+"""
--- a/pr_agent/tools/pr_line_questions.py
+++ b/pr_agent/tools/pr_line_questions.py
@ -0,0 +1,104 @@
+import argparse
+import copy
+from functools import partial
+
+from jinja2 import Environment, StrictUndefined
+
+from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
+from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
+from pr_agent.algo.git_patch_processing import convert_to_hunks_with_lines_numbers, \
+    extract_hunk_lines_from_patch
+from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
+from pr_agent.algo.token_handler import TokenHandler
+from pr_agent.algo.utils import ModelType
+from pr_agent.config_loader import get_settings
+from pr_agent.git_providers import get_git_provider
+from pr_agent.git_providers.git_provider import get_main_pr_language
+from pr_agent.log import get_logger
+from pr_agent.servers.help import HelpMessage
+
+
+class PR_LineQuestions:
+    def __init__(self, pr_url: str, args=None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler):
+        self.question_str = self.parse_args(args)
+        self.git_provider = get_git_provider()(pr_url)
+
+        self.ai_handler = ai_handler()
+
+        self.vars = {
+            "title": self.git_provider.pr.title,
+            "branch": self.git_provider.get_pr_branch(),
+            "diff": "",  # empty diff for initial calculation
+            "question": self.question_str,
+            "full_hunk": "",
+            "selected_lines": "",
+        }
+        self.token_handler = TokenHandler(self.git_provider.pr,
+                                          self.vars,
+                                          get_settings().pr_line_questions_prompt.system,
+                                          get_settings().pr_line_questions_prompt.user)
+        self.patches_diff = None
+        self.prediction = None
+
+    def parse_args(self, args):
+        if args and len(args) > 0:
+            question_str = " ".join(args)
+        else:
+            question_str = ""
+        return question_str
+
+
+    async def run(self):
+        get_logger().info('Answering a PR lines question...')
+        # if get_settings().config.publish_output:
+        #     self.git_provider.publish_comment("Preparing answer...", is_temporary=True)
+
+        self.patch_with_lines = ""
+        ask_diff = get_settings().get('ask_diff_hunk', "")
+        line_start = get_settings().get('line_start', '')
+        line_end = get_settings().get('line_end', '')
+        side = get_settings().get('side', 'RIGHT')
+        file_name = get_settings().get('file_name', '')
+        comment_id = get_settings().get('comment_id', '')
+        if ask_diff:
+            self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(ask_diff,
+                                                                                       file_name,
+                                                                                       line_start=line_start,
+                                                                                       line_end=line_end,
+                                                                                       side=side
+                                                                                       )
+        else:
+            diff_files = self.git_provider.get_diff_files()
+            for file in diff_files:
+                if file.filename == file_name:
+                    self.patch_with_lines, self.selected_lines = extract_hunk_lines_from_patch(file.patch, file.filename,
+                                                                                               line_start=line_start,
+                                                                                               line_end=line_end,
+                                                                                               side=side)
+        if self.patch_with_lines:
+            response = await retry_with_fallback_models(self._get_prediction, model_type=ModelType.TURBO)
+
+            get_logger().info('Preparing answer...')
+            if comment_id:
+                self.git_provider.reply_to_comment_from_comment_id(comment_id, response)
+            else:
+                self.git_provider.publish_comment(response)
+
+        return ""
+
+    async def _get_prediction(self, model: str):
+        variables = copy.deepcopy(self.vars)
+        variables["full_hunk"] = self.patch_with_lines  # update diff
+        variables["selected_lines"] = self.selected_lines
+        environment = Environment(undefined=StrictUndefined)
+        system_prompt = environment.from_string(get_settings().pr_line_questions_prompt.system).render(variables)
+        user_prompt = environment.from_string(get_settings().pr_line_questions_prompt.user).render(variables)
+        if get_settings().config.verbosity_level >= 2:
+            # get_logger().info(f"\nSystem prompt:\n{system_prompt}")
+            # get_logger().info(f"\nUser prompt:\n{user_prompt}")
+            print(f"\nSystem prompt:\n{system_prompt}")
+            print(f"\nUser prompt:\n{user_prompt}")
+
+        response, finish_reason = await self.ai_handler.chat_completion(model=model, temperature=0.2,
+                                                                        system=system_prompt, user=user_prompt)
+        return response