Merge pull request #476 from Codium-ai/tr/improve_inplace

Enhancements and Bug Fixes in Code Suggestions and Line Link Generation
2025-07-13 17:20:38 +08:00 · 2023-11-26 07:32:37 -08:00
parent f104b70703 466af37675
commit fc8865f8dc
13 changed files with 76 additions and 14 deletions
--- a/docs/IMPROVE.md
+++ b/docs/IMPROVE.md
@ -33,6 +33,16 @@ Under the section 'pr_code_suggestions', the [configuration file](./../pr_agent/
 - `max_number_of_calls`: maximum number of chunks. Default is 5.
 - `final_clip_factor`: factor to remove suggestions with low confidence. Default is 0.9.

+#### summarize mode
+- `summarize`: if set to true, the tool will summarize the PR code changes. Default is false.
+
+In this mode, instead of presenting commitable suggestions, the different suggestions will be combined into a single compact comment, with a significant smaller PR footprint.
+
+For example:
+
+`/improve --pr_code_suggestions.summarize=true`
+
+<kbd><img src=./../pics/improved_summerize_open.png width="768"></kbd>

 #### A note on code suggestions quality

--- a/pics/improved_summerize_closed.png
+++ b/pics/improved_summerize_closed.png
--- a/pics/improved_summerize_open.png
+++ b/pics/improved_summerize_open.png
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -58,7 +58,8 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool=True) -> str:
            emoji = emojis.get(key, "")
            if key.lower() == 'code feedback':
                if gfm_supported:
-                    markdown_text += f"\n\n- **<details><summary> { emoji } Code feedback:**</summary>\n\n"
+                    markdown_text += f"\n\n- "
+                    markdown_text += f"<details><summary> { emoji } Code feedback:</summary>\n\n"
                else:
                    markdown_text += f"\n\n- **{emoji} Code feedback:**\n\n"
            else:
@ -99,9 +100,9 @@ def parse_code_suggestion(code_suggestions: dict, gfm_supported: bool=True) -> s
                markdown_text += f"    - **{code_key}:**\n{code_str_indented}\n"
        else:
            if "relevant file" in sub_key.lower():
-                markdown_text += f"\n  - **{sub_key}:** {sub_value}\n"
+                markdown_text += f"\n  - **{sub_key}:** {sub_value}  \n"
            else:
-                markdown_text += f"   **{sub_key}:** {sub_value}\n"
+                markdown_text += f"   **{sub_key}:** {sub_value}  \n"
            if not gfm_supported:
                if "relevant line" not in sub_key.lower(): # nicer presentation
                        # markdown_text = markdown_text.rstrip('\n') + "\\\n" # works for gitlab
--- a/pr_agent/git_providers/bitbucket_provider.py
+++ b/pr_agent/git_providers/bitbucket_provider.py
@ -228,6 +228,10 @@ class BitbucketProvider(GitProvider):
        )
        return response

+    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
+        link = f"{self.pr_url}/#L{relevant_file}T{relevant_line_start}"
+        return link
+
    def generate_link_to_relevant_line_number(self, suggestion) -> str:
        try:
            relevant_file = suggestion['relevant file'].strip('`').strip("'")
--- a/pr_agent/git_providers/git_provider.py
+++ b/pr_agent/git_providers/git_provider.py
@ -89,6 +89,9 @@ class GitProvider(ABC):
    def get_pr_id(self):
        return ""

+    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
+        return ""
+
    #### comments operations ####
    @abstractmethod
    def publish_comment(self, pr_comment: str, is_temporary: bool = False):
--- a/pr_agent/git_providers/github_provider.py
+++ b/pr_agent/git_providers/github_provider.py
@ -501,6 +501,15 @@ class GithubProvider(GitProvider):

        return ""

+    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
+        sha_file = hashlib.sha256(relevant_file.encode('utf-8')).hexdigest()
+        if relevant_line_end:
+            link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}-R{relevant_line_end}"
+        else:
+            link = f"https://github.com/{self.repo}/pull/{self.pr_num}/files#diff-{sha_file}R{relevant_line_start}"
+        return link
+
+
    def get_pr_id(self):
        try:
            pr_id = f"{self.repo}/{self.pr_num}"
--- a/pr_agent/git_providers/gitlab_provider.py
+++ b/pr_agent/git_providers/gitlab_provider.py
@ -43,7 +43,7 @@ class GitLabProvider(GitProvider):
        self.incremental = incremental

    def is_supported(self, capability: str) -> bool:
-        if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', 'gfm_markdown']:
+        if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments']: # gfm_markdown is supported in gitlab !
            return False
        return True

@ -422,6 +422,14 @@ class GitLabProvider(GitProvider):
        except:
            return ""

+    def get_line_link(self, relevant_file: str, relevant_line_start: int, relevant_line_end: int = None) -> str:
+        if relevant_line_end:
+            link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}-L{relevant_line_end}"
+        else:
+            link = f"https://gitlab.com/codiumai/pr-agent/-/blob/{self.mr.source_branch}/{relevant_file}?ref_type=heads#L{relevant_line_start}"
+        return link
+
+
    def generate_link_to_relevant_line_number(self, suggestion) -> str:
        try:
            relevant_file = suggestion['relevant file'].strip('`').strip("'")
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@ -57,6 +57,7 @@ include_generated_by_header=true

 [pr_code_suggestions] # /improve #
 num_code_suggestions=4
+summarize = false
 extra_instructions = ""
 rank_suggestions = false
 # params for '/improve --extended' mode
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -1,7 +1,6 @@
 import copy
 import textwrap
 from typing import Dict, List
-
 from jinja2 import Environment, StrictUndefined

 from pr_agent.algo.ai_handler import AiHandler
@ -55,9 +54,9 @@ class PRCodeSuggestions:
        try:
            get_logger().info('Generating code suggestions for PR...')
            if get_settings().config.publish_output:
-                self.git_provider.publish_comment("Preparing review...", is_temporary=True)
+                self.git_provider.publish_comment("Preparing suggestions...", is_temporary=True)

-            get_logger().info('Preparing PR review...')
+            get_logger().info('Preparing PR code suggestions...')
            if not self.is_extended:
                await retry_with_fallback_models(self._prepare_prediction)
                data = self._prepare_pr_code_suggestions()
@ -73,10 +72,14 @@ class PRCodeSuggestions:
                data['Code suggestions'] = await self.rank_suggestions(data['Code suggestions'])

            if get_settings().config.publish_output:
-                get_logger().info('Pushing PR review...')
+                get_logger().info('Pushing PR code suggestions...')
                self.git_provider.remove_initial_comment()
-                get_logger().info('Pushing inline code suggestions...')
-                self.push_inline_code_suggestions(data)
+                if get_settings().pr_code_suggestions.summarize:
+                    get_logger().info('Pushing summarize code suggestions...')
+                    self.publish_summarizes_suggestions(data)
+                else:
+                    get_logger().info('Pushing inline code suggestions...')
+                    self.push_inline_code_suggestions(data)
        except Exception as e:
            get_logger().error(f"Failed to generate code suggestions for PR, error: {e}")

@ -244,4 +247,27 @@ class PRCodeSuggestions:

        return data_sorted

+    def publish_summarizes_suggestions(self, data: Dict):
+        try:
+            data_markdown = "## PR Code Suggestions\n\n"
+            for s in data['Code suggestions']:
+                code_snippet_link = self.git_provider.get_line_link(s['relevant file'], s['relevant lines start'],
+                                                                    s['relevant lines end'])
+                data_markdown += f"\n💡 Suggestion:\n\n**{s['suggestion content']}**\n\n"
+                if code_snippet_link:
+                    data_markdown += f" File: [{s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})]({code_snippet_link})\n\n"
+                else:
+                    data_markdown += f"File: {s['relevant file']} ({s['relevant lines start']}-{s['relevant lines end']})\n\n"
+                if self.git_provider.is_supported("gfm_markdown"):
+                    data_markdown += "<details> <summary> Example code:</summary>\n\n"
+                    data_markdown += f"___\n\n"
+                data_markdown += f"Existing code:\n```{self.main_language}\n{s['existing code']}\n```\n"
+                data_markdown += f"Improved code:\n```{self.main_language}\n{s['improved code']}\n```\n"
+                if self.git_provider.is_supported("gfm_markdown"):
+                    data_markdown += "</details>\n"
+                data_markdown += "\n___\n\n"
+            self.git_provider.publish_comment(data_markdown)
+        except Exception as e:
+            get_logger().info(f"Failed to publish summarized code suggestions, error: {e}")
+

--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -250,7 +250,7 @@ class PRReviewer:
        if not get_settings().get("CONFIG.CLI_MODE", False):
            markdown_text += "\n### How to use\n"
            if self.git_provider.is_supported("gfm_markdown"):
-                markdown_text += "\n**<details><summary> Instructions**</summary>\n"
+                markdown_text += "\n <details> <summary> Instructions</summary>\n\n"
            bot_user = "[bot]" if get_settings().github_app.override_deployment_type else get_settings().github_app.bot_user
            if user and bot_user not in user:
                markdown_text += bot_help_text(user)
--- a/tests/unittest/test_convert_to_markdown.py
+++ b/tests/unittest/test_convert_to_markdown.py
@ -71,7 +71,7 @@ class TestConvertToMarkdown:
 - 📌 **Type of PR:** Test type\n\
 - 🧪 **Relevant tests added:** no\n\
 - ✨ **Focused PR:** Yes\n\
-  **General PR suggestions:** general suggestion...\n\n\n- **<details><summary> 🤖 Code feedback:**</summary>\n\n  - **Code example:**\n    - **Before:**\n        ```\n        Code before\n        ```\n    - **After:**\n        ```\n        Code after\n        ```\n\n  - **Code example:**\n    - **Before:**\n        ```\n        Code before 2\n        ```\n    - **After:**\n        ```\n        Code after 2\n        ```\n\n</details>\
+-  **General PR suggestions:** general suggestion...\n\n\n- <details><summary> 🤖 Code feedback:</summary>\n\n  - **Code example:**\n    - **Before:**\n        ```\n        Code before\n        ```\n    - **After:**\n        ```\n        Code after\n        ```\n\n  - **Code example:**\n    - **Before:**\n        ```\n        Code before 2\n        ```\n    - **After:**\n        ```\n        Code after 2\n        ```\n\n</details>\
 """
        assert convert_to_markdown(input_data).strip() == expected_output.strip()

--- a/tests/unittest/test_parse_code_suggestion.py
+++ b/tests/unittest/test_parse_code_suggestion.py
@ -61,7 +61,7 @@ class TestParseCodeSuggestion:
            'before': 'Before 1',
            'after': 'After 1'
        }
-        expected_output = "   **suggestion:** Suggestion 1\n   **description:** Description 1\n   **before:** Before 1\n   **after:** After 1\n\n"  # noqa: E501
+        expected_output = '   **suggestion:** Suggestion 1  \n   **description:** Description 1  \n   **before:** Before 1  \n   **after:** After 1  \n\n'  # noqa: E501
        assert parse_code_suggestion(code_suggestions) == expected_output

    # Tests that function returns correct output when input dictionary has 'code example' key
@ -74,5 +74,5 @@ class TestParseCodeSuggestion:
                'after': 'After 2'
            }
        }
-        expected_output = "   **suggestion:** Suggestion 2\n   **description:** Description 2\n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n"  # noqa: E501
+        expected_output = '   **suggestion:** Suggestion 2  \n   **description:** Description 2  \n  - **code example:**\n    - **before:**\n        ```\n        Before 2\n        ```\n    - **after:**\n        ```\n        After 2\n        ```\n\n'  # noqa: E501
        assert parse_code_suggestion(code_suggestions) == expected_output