Merge pull request #1011 from Codium-ai/tr/review_redesign

Tr/review redesign
2025-07-21 04:50:39 +08:00 · 2024-07-03 08:38:27 +03:00
parent 96ededd12a 040503039e
commit f0efe4a707
4 changed files with 142 additions and 72 deletions
--- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py
+++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py
@ -99,7 +99,7 @@ class LiteLLMAIHandler(BaseAiHandler):
        return get_settings().get("OPENAI.DEPLOYMENT_ID", None)

    @retry(
-        retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError
+        retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError
        stop=stop_after_attempt(OPENAI_RETRIES)
    )
    async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None):
@ -143,7 +143,7 @@ class LiteLLMAIHandler(BaseAiHandler):
                get_logger().info(f"\nUser prompt:\n{user}")

            response = await acompletion(**kwargs)
-        except (openai.APIError, openai.Timeout) as e:
+        except (openai.APIError, openai.APITimeoutError) as e:
            get_logger().error("Error during OpenAI inference: ", e)
            raise
        except (openai.RateLimitError) as e:
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -37,7 +37,7 @@ def get_setting(key: str) -> Any:
        return global_settings.get(key, None)


-def emphasize_header(text: str) -> str:
+def emphasize_header(text: str, only_markdown=False) -> str:
    try:
        # Finding the position of the first occurrence of ": "
        colon_position = text.find(": ")
@ -45,6 +45,9 @@ def emphasize_header(text: str) -> str:
        # Splitting the string and wrapping the first part in <strong> tags
        if colon_position != -1:
            # Everything before the colon (inclusive) is wrapped in <strong> tags
+            if only_markdown:
+                transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:]
+            else:
                transformed_string = "<strong>" + text[:colon_position + 1] + "</strong>" +'<br>' + text[colon_position + 1:]
        else:
            # If there's no ": ", return the original string
@ -67,8 +70,7 @@ def unique_strings(input_list: List[str]) -> List[str]:
            seen.add(item)
    return unique_list

-
-def convert_to_markdown(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
+def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str:
    """
    Convert a dictionary of data into markdown format.
    Args:
@ -96,12 +98,11 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
    else:
        markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n"
        markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n"
+    # if not output_data or not output_data.get('review', {}):
+    #     return ""
+
    if gfm_supported:
        markdown_text += "<table>\n"
-        # markdown_text += """<td> &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Feedback&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;</td> <td></td></tr>"""
-
-    if not output_data or not output_data.get('review', {}):
-        return ""

    for key, value in output_data['review'].items():
        if value is None or value == '' or value == {} or value == []:
@ -109,43 +110,87 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
                continue
        key_nice = key.replace('_', ' ').capitalize()
        emoji = emojis.get(key_nice, "")
-        if gfm_supported:
        if 'Estimated effort to review' in key_nice:
-                key_nice = 'Estimated&nbsp;effort&nbsp;to&nbsp;review [1-5]'
-            if 'security concerns' in key_nice.lower():
+            key_nice = 'Estimated effort to review'
+            value_int = int(value)
+            blue_bars = '🔵' * value_int
+            white_bars = '⚪' * (5 - value_int)
+            value = f"{value.strip()} {blue_bars}{white_bars}"
+            if gfm_supported:
+                markdown_text += f"<tr><td>"
+                markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
+                markdown_text += f"</td></tr>\n"
+            else:
+                markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
+        elif 'relevant tests' in key_nice.lower():
+            value = value.strip().lower()
+            if gfm_supported:
+                markdown_text += f"<tr><td>"
+                if is_value_no(value):
+                    markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
+                else:
+                    markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
+                markdown_text += f"</td></tr>\n"
+            else:
+                if gfm_supported:
+                    markdown_text += f"<tr><td>"
+                    if is_value_no(value):
+                        markdown_text += f"{emoji}&nbsp;<strong>No relevant tests</strong>"
+                    else:
+                        markdown_text += f"{emoji}&nbsp;<strong>PR contains tests</strong>"
+                else:
+                    if is_value_no(value):
+                        markdown_text += f'### {emoji} No relevant tests\n\n'
+                    else:
+                        markdown_text += f"### PR contains tests\n\n"
+        elif 'security concerns' in key_nice.lower():
+            if gfm_supported:
+                markdown_text += f"<tr><td>"
+                if is_value_no(value):
+                    markdown_text += f"{emoji}&nbsp;<strong>No security concerns identified</strong>"
+                else:
+                    markdown_text += f"{emoji}&nbsp;<strong>Security concerns</strong><br><br>\n\n"
                    value = emphasize_header(value.strip())
-                markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
+                    markdown_text += f"{value}"
+                markdown_text += f"</td></tr>\n"
+            else:
+                if is_value_no(value):
+                    markdown_text += f'### {emoji} No security concerns identified\n\n'
+                else:
+                    markdown_text += f"### {emoji} Security concerns\n\n"
+                    value = emphasize_header(value.strip())
+                    markdown_text += f"{value}\n\n"
        elif 'can be split' in key_nice.lower():
+            if gfm_supported:
+                markdown_text += f"<tr><td>"
                markdown_text += process_can_be_split(emoji, value)
+                markdown_text += f"</td></tr>\n"
        elif 'key issues to review' in key_nice.lower():
            value = value.strip()
            issues = value.split('\n- ')
            for i, _ in enumerate(issues):
                issues[i] = issues[i].strip().strip('-').strip()
            issues = unique_strings(issues) # remove duplicates
-                number_of_issues = len(issues)
-                if number_of_issues > 1:
-                    markdown_text += f"<tr><td rowspan={number_of_issues}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
+            if gfm_supported:
+                markdown_text += f"<tr><td>"
+                markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong><br><br>\n\n"
+            else:
+                markdown_text += f"### {emoji} Key issues to review:\n\n"
            for i, issue in enumerate(issues):
                if not issue:
                    continue
-                        issue = emphasize_header(issue)
-                        issue = replace_code_tags(issue)
-                        if i == 0:
-                            markdown_text += f"<td>\n{issue}</td></tr>\n"
+                issue = emphasize_header(issue, only_markdown=True)
+                markdown_text += f"{issue}\n\n"
+            if gfm_supported:
+                markdown_text += f"</td></tr>\n"
        else:
-                            markdown_text += f"<tr>\n<td>\n{issue}</td></tr>\n"
+            if gfm_supported:
+                markdown_text += f"<tr><td>"
+                markdown_text += f"{emoji}&nbsp;<strong>{key_nice}</strong>: {value}"
+                markdown_text += f"</td></tr>\n"
            else:
-                    value = emphasize_header(value.strip('-').strip())
-                    value = replace_code_tags(value)
-                    markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
-            else:
-                markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n{value}\n\n</td></tr>\n"
-        else:
-            if len(value.split()) > 1:
-                markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n"
-            else:
-                markdown_text += f"{emoji} **{key_nice}:** {value}\n\n"
+                markdown_text += f"### {emoji} {key_nice}: {value}\n\n"
+
    if gfm_supported:
        markdown_text += "</table>\n"

@ -155,7 +200,7 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
            markdown_text += f"<details><summary> <strong>Code feedback:</strong></summary>\n\n"
            markdown_text += "<hr>"
        else:
-            markdown_text += f"\n\n** Code feedback:**\n\n"
+            markdown_text += f"\n\n### Code feedback:\n\n"
        for i, value in enumerate(output_data['code_feedback']):
            if value is None or value == '' or value == {} or value == []:
                continue
@ -164,8 +209,6 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment
            markdown_text= markdown_text[:-4]
        if gfm_supported:
            markdown_text += f"</details>"
-    #print(markdown_text)
-

    return markdown_text

@ -177,29 +220,47 @@ def process_can_be_split(emoji, value):
        markdown_text = ""
        if not value or isinstance(value, list) and len(value) == 1:
            value = "No"
-            markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
+            # markdown_text += f"<tr><td> {emoji}&nbsp;<strong>{key_nice}</strong></td><td>\n\n{value}\n\n</td></tr>\n"
+            # markdown_text += f"### {emoji} No multiple PR themes\n\n"
+            markdown_text += f"{emoji} <strong>No multiple PR themes</strong>\n\n"
        else:
-            number_of_splits = len(value)
-            markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
+            markdown_text += f"{emoji} <strong>{key_nice}</strong><br><br>\n\n"
            for i, split in enumerate(value):
                title = split.get('title', '')
                relevant_files = split.get('relevant_files', [])
-                if i == 0:
-                    markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
-                    markdown_text += f"<hr>\n"
-                    markdown_text += f"Relevant files:\n"
-                    markdown_text += f"<ul>\n"
+                markdown_text += f"<details><summary>\nSub-PR theme: <b>{title}</b></summary>\n\n"
+                markdown_text += f"___\n\nRelevant files:\n\n"
                for file in relevant_files:
-                        markdown_text += f"<li>{file}</li>\n"
-                    markdown_text += f"</ul>\n\n</details></td></tr>\n"
-                else:
-                    markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
-                    markdown_text += f"<hr>\n"
-                    markdown_text += f"Relevant files:\n"
-                    markdown_text += f"<ul>\n"
-                    for file in relevant_files:
-                        markdown_text += f"<li>{file}</li>\n"
-                    markdown_text += f"</ul>\n\n</details></td></tr>\n"
+                    markdown_text += f"- {file}\n"
+                markdown_text += f"___\n\n"
+                markdown_text += f"</details>\n\n"
+
+                # markdown_text += f"#### Sub-PR theme: {title}\n\n"
+                # markdown_text += f"Relevant files:\n\n"
+                # for file in relevant_files:
+                #     markdown_text += f"- {file}\n"
+                # markdown_text += "\n"
+            # number_of_splits = len(value)
+            # markdown_text += f"<tr><td rowspan={number_of_splits}> {emoji}&nbsp;<strong>{key_nice}</strong></td>\n"
+            # for i, split in enumerate(value):
+            #     title = split.get('title', '')
+            #     relevant_files = split.get('relevant_files', [])
+            #     if i == 0:
+            #         markdown_text += f"<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
+            #         markdown_text += f"<hr>\n"
+            #         markdown_text += f"Relevant files:\n"
+            #         markdown_text += f"<ul>\n"
+            #         for file in relevant_files:
+            #             markdown_text += f"<li>{file}</li>\n"
+            #         markdown_text += f"</ul>\n\n</details></td></tr>\n"
+            #     else:
+            #         markdown_text += f"<tr>\n<td><details><summary>\nSub-PR theme:<br><strong>{title}</strong></summary>\n\n"
+            #         markdown_text += f"<hr>\n"
+            #         markdown_text += f"Relevant files:\n"
+            #         markdown_text += f"<ul>\n"
+            #         for file in relevant_files:
+            #             markdown_text += f"<li>{file}</li>\n"
+            #         markdown_text += f"</ul>\n\n</details></td></tr>\n"
    except Exception as e:
        get_logger().exception(f"Failed to process can be split: {e}")
        return ""
@ -778,3 +839,11 @@ def show_relevant_configurations(relevant_section: str) -> str:
    markdown_text += "\n```"
    markdown_text += "\n</details>\n"
    return markdown_text
+
+def is_value_no(value):
+    if value is None:
+        return True
+    value_str = str(value).strip().lower()
+    if value_str == 'no' or value_str == 'none' or value_str == 'false':
+        return True
+    return False
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -27,14 +27,15 @@ __old hunk__
 ## file: 'src/file2.py'
 ...
 ======
- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed.
- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code.
+- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed.
 - We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference.
+- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \
+Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only.


 Specific instructions for generating code suggestions:
 - Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful.
- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes.
+- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers).
 - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code.
 - Don't suggest to add docstring, type hints, or comments, or to remove unused imports.
 - Suggestions should not repeat code already present in the '__new hunk__' sections.
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@ -8,8 +8,8 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler
 from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import PRReviewHeader, convert_to_markdown, github_action_output, load_yaml, ModelType, \
-    show_relevant_configurations
+from pr_agent.algo.utils import github_action_output, load_yaml, ModelType, \
+    show_relevant_configurations, convert_to_markdown_v2, PRReviewHeader
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import get_git_provider, get_git_provider_with_context
 from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language
@ -230,7 +230,7 @@ class PRReviewer:
                              f"{self.git_provider.incremental.first_new_commit_sha}"
            incremental_review_markdown_text = f"Starting from commit {last_commit_url}"

-        markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"),
+        markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"),
                                            incremental_review_markdown_text)

        # Add help text if gfm_markdown is supported