diff --git a/pr_agent/algo/ai_handlers/litellm_ai_handler.py b/pr_agent/algo/ai_handlers/litellm_ai_handler.py index 0e5501d3..5d0929b5 100644 --- a/pr_agent/algo/ai_handlers/litellm_ai_handler.py +++ b/pr_agent/algo/ai_handlers/litellm_ai_handler.py @@ -99,7 +99,7 @@ class LiteLLMAIHandler(BaseAiHandler): return get_settings().get("OPENAI.DEPLOYMENT_ID", None) @retry( - retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.Timeout)), # No retry on RateLimitError + retry=retry_if_exception_type((openai.APIError, openai.APIConnectionError, openai.APITimeoutError)), # No retry on RateLimitError stop=stop_after_attempt(OPENAI_RETRIES) ) async def chat_completion(self, model: str, system: str, user: str, temperature: float = 0.2, img_path: str = None): @@ -143,7 +143,7 @@ class LiteLLMAIHandler(BaseAiHandler): get_logger().info(f"\nUser prompt:\n{user}") response = await acompletion(**kwargs) - except (openai.APIError, openai.Timeout) as e: + except (openai.APIError, openai.APITimeoutError) as e: get_logger().error("Error during OpenAI inference: ", e) raise except (openai.RateLimitError) as e: diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 4eb62e91..c0955dea 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -37,7 +37,7 @@ def get_setting(key: str) -> Any: return global_settings.get(key, None) -def emphasize_header(text: str) -> str: +def emphasize_header(text: str, only_markdown=False) -> str: try: # Finding the position of the first occurrence of ": " colon_position = text.find(": ") @@ -45,7 +45,10 @@ def emphasize_header(text: str) -> str: # Splitting the string and wrapping the first part in tags if colon_position != -1: # Everything before the colon (inclusive) is wrapped in tags - transformed_string = "" + text[:colon_position + 1] + "" +'
' + text[colon_position + 1:] + if only_markdown: + transformed_string = f"**{text[:colon_position + 1]}**\n" + text[colon_position + 1:] + else: + transformed_string = "" + text[:colon_position + 1] + "" +'
' + text[colon_position + 1:] else: # If there's no ": ", return the original string transformed_string = text @@ -67,8 +70,7 @@ def unique_strings(input_list: List[str]) -> List[str]: seen.add(item) return unique_list - -def convert_to_markdown(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str: +def convert_to_markdown_v2(output_data: dict, gfm_supported: bool = True, incremental_review=None) -> str: """ Convert a dictionary of data into markdown format. Args: @@ -96,12 +98,11 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment else: markdown_text += f"{PRReviewHeader.INCREMENTAL.value} 🔍\n\n" markdown_text += f"⏮️ Review for commits since previous PR-Agent review {incremental_review}.\n\n" + # if not output_data or not output_data.get('review', {}): + # return "" + if gfm_supported: markdown_text += "\n" - # markdown_text += """""" - - if not output_data or not output_data.get('review', {}): - return "" for key, value in output_data['review'].items(): if value is None or value == '' or value == {} or value == []: @@ -109,43 +110,87 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment continue key_nice = key.replace('_', ' ').capitalize() emoji = emojis.get(key_nice, "") - if gfm_supported: - if 'Estimated effort to review' in key_nice: - key_nice = 'Estimated effort to review [1-5]' - if 'security concerns' in key_nice.lower(): - value = emphasize_header(value.strip()) - markdown_text += f"\n" - elif 'can be split' in key_nice.lower(): - markdown_text += process_can_be_split(emoji, value) - elif 'key issues to review' in key_nice.lower(): - value = value.strip() - issues = value.split('\n- ') - for i, _ in enumerate(issues): - issues[i] = issues[i].strip().strip('-').strip() - issues = unique_strings(issues) # remove duplicates - number_of_issues = len(issues) - if number_of_issues > 1: - markdown_text += f"\n" - for i, issue in enumerate(issues): - if not issue: - continue - issue = emphasize_header(issue) - issue = replace_code_tags(issue) - if i == 0: - markdown_text += f"\n" - else: - markdown_text += f"\n\n" + if 'Estimated effort to review' in key_nice: + key_nice = 'Estimated effort to review' + value_int = int(value) + blue_bars = '🔵' * value_int + white_bars = '⚪' * (5 - value_int) + value = f"{value.strip()} {blue_bars}{white_bars}" + if gfm_supported: + markdown_text += f"\n" + else: + markdown_text += f"### {emoji} {key_nice}: {value}\n\n" + elif 'relevant tests' in key_nice.lower(): + value = value.strip().lower() + if gfm_supported: + markdown_text += f"\n" + markdown_text += f"{emoji} PR contains tests" + markdown_text += f"\n" else: - markdown_text += f"\n" + if gfm_supported: + markdown_text += f"\n" + else: + if is_value_no(value): + markdown_text += f'### {emoji} No security concerns identified\n\n' + else: + markdown_text += f"### {emoji} Security concerns\n\n" + value = emphasize_header(value.strip()) + markdown_text += f"{value}\n\n" + elif 'can be split' in key_nice.lower(): + if gfm_supported: + markdown_text += f"\n" + elif 'key issues to review' in key_nice.lower(): + value = value.strip() + issues = value.split('\n- ') + for i, _ in enumerate(issues): + issues[i] = issues[i].strip().strip('-').strip() + issues = unique_strings(issues) # remove duplicates + if gfm_supported: + markdown_text += f"\n" else: - if len(value.split()) > 1: - markdown_text += f"{emoji} **{key_nice}:**\n\n {value}\n\n" + if gfm_supported: + markdown_text += f"\n" else: - markdown_text += f"{emoji} **{key_nice}:** {value}\n\n" + markdown_text += f"### {emoji} {key_nice}: {value}\n\n" + if gfm_supported: markdown_text += "
      Feedback            
{emoji} {key_nice}\n{value}\n\n
{emoji} {key_nice}\n{issue}
\n{issue}
" + markdown_text += f"{emoji} {key_nice}: {value}" + markdown_text += f"
" + if is_value_no(value): + markdown_text += f"{emoji} No relevant tests" else: - value = emphasize_header(value.strip('-').strip()) - value = replace_code_tags(value) - markdown_text += f"
{emoji} {key_nice}\n{value}\n\n
{emoji} {key_nice}\n{value}\n\n
" + if is_value_no(value): + markdown_text += f"{emoji} No relevant tests" + else: + markdown_text += f"{emoji} PR contains tests" + else: + if is_value_no(value): + markdown_text += f'### {emoji} No relevant tests\n\n' + else: + markdown_text += f"### PR contains tests\n\n" + elif 'security concerns' in key_nice.lower(): + if gfm_supported: + markdown_text += f"
" + if is_value_no(value): + markdown_text += f"{emoji} No security concerns identified" + else: + markdown_text += f"{emoji} Security concerns

\n\n" + value = emphasize_header(value.strip()) + markdown_text += f"{value}" + markdown_text += f"
" + markdown_text += process_can_be_split(emoji, value) + markdown_text += f"
" + markdown_text += f"{emoji} {key_nice}

\n\n" + else: + markdown_text += f"### {emoji} Key issues to review:\n\n" + for i, issue in enumerate(issues): + if not issue: + continue + issue = emphasize_header(issue, only_markdown=True) + markdown_text += f"{issue}\n\n" + if gfm_supported: + markdown_text += f"
" + markdown_text += f"{emoji} {key_nice}: {value}" + markdown_text += f"
\n" @@ -155,17 +200,15 @@ def convert_to_markdown(output_data: dict, gfm_supported: bool = True, increment markdown_text += f"
Code feedback:\n\n" markdown_text += "
" else: - markdown_text += f"\n\n** Code feedback:**\n\n" + markdown_text += f"\n\n### Code feedback:\n\n" for i, value in enumerate(output_data['code_feedback']): if value is None or value == '' or value == {} or value == []: continue markdown_text += parse_code_suggestion(value, i, gfm_supported)+"\n\n" if markdown_text.endswith('
'): - markdown_text = markdown_text[:-4] + markdown_text= markdown_text[:-4] if gfm_supported: markdown_text += f"
" - #print(markdown_text) - return markdown_text @@ -177,29 +220,47 @@ def process_can_be_split(emoji, value): markdown_text = "" if not value or isinstance(value, list) and len(value) == 1: value = "No" - markdown_text += f" {emoji} {key_nice}\n\n{value}\n\n\n" + # markdown_text += f" {emoji} {key_nice}\n\n{value}\n\n\n" + # markdown_text += f"### {emoji} No multiple PR themes\n\n" + markdown_text += f"{emoji} No multiple PR themes\n\n" else: - number_of_splits = len(value) - markdown_text += f" {emoji} {key_nice}\n" + markdown_text += f"{emoji} {key_nice}

\n\n" for i, split in enumerate(value): title = split.get('title', '') relevant_files = split.get('relevant_files', []) - if i == 0: - markdown_text += f"
\nSub-PR theme:
{title}
\n\n" - markdown_text += f"
\n" - markdown_text += f"Relevant files:\n" - markdown_text += f"
    \n" - for file in relevant_files: - markdown_text += f"
  • {file}
  • \n" - markdown_text += f"
\n\n
\n" - else: - markdown_text += f"\n
\nSub-PR theme:
{title}
\n\n" - markdown_text += f"
\n" - markdown_text += f"Relevant files:\n" - markdown_text += f"
    \n" - for file in relevant_files: - markdown_text += f"
  • {file}
  • \n" - markdown_text += f"
\n\n
\n" + markdown_text += f"
\nSub-PR theme: {title}\n\n" + markdown_text += f"___\n\nRelevant files:\n\n" + for file in relevant_files: + markdown_text += f"- {file}\n" + markdown_text += f"___\n\n" + markdown_text += f"
\n\n" + + # markdown_text += f"#### Sub-PR theme: {title}\n\n" + # markdown_text += f"Relevant files:\n\n" + # for file in relevant_files: + # markdown_text += f"- {file}\n" + # markdown_text += "\n" + # number_of_splits = len(value) + # markdown_text += f" {emoji} {key_nice}\n" + # for i, split in enumerate(value): + # title = split.get('title', '') + # relevant_files = split.get('relevant_files', []) + # if i == 0: + # markdown_text += f"
\nSub-PR theme:
{title}
\n\n" + # markdown_text += f"
\n" + # markdown_text += f"Relevant files:\n" + # markdown_text += f"
    \n" + # for file in relevant_files: + # markdown_text += f"
  • {file}
  • \n" + # markdown_text += f"
\n\n
\n" + # else: + # markdown_text += f"\n
\nSub-PR theme:
{title}
\n\n" + # markdown_text += f"
\n" + # markdown_text += f"Relevant files:\n" + # markdown_text += f"
    \n" + # for file in relevant_files: + # markdown_text += f"
  • {file}
  • \n" + # markdown_text += f"
\n\n
\n" except Exception as e: get_logger().exception(f"Failed to process can be split: {e}") return "" @@ -778,3 +839,11 @@ def show_relevant_configurations(relevant_section: str) -> str: markdown_text += "\n```" markdown_text += "\n\n" return markdown_text + +def is_value_no(value): + if value is None: + return True + value_str = str(value).strip().lower() + if value_str == 'no' or value_str == 'none' or value_str == 'false': + return True + return False diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index ab20a341..435ea91b 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -27,14 +27,15 @@ __old hunk__ ## file: 'src/file2.py' ... ====== -- In this format, we separated each hunk of code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code that was removed. -- Code lines are prefixed symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. +- In this format, we separated each hunk of diff code to '__new hunk__' and '__old hunk__' sections. The '__new hunk__' section contains the new code of the chunk, and the '__old hunk__' section contains the old code, that was removed. - We also added line numbers for the '__new hunk__' sections, to help you refer to the code lines in your suggestions. These line numbers are not part of the actual code, and are only used for reference. +- Code lines are prefixed with symbols ('+', '-', ' '). The '+' symbol indicates new code added in the PR, the '-' symbol indicates code removed in the PR, and the ' ' symbol indicates unchanged code. \ +Suggestions should always focus on ways to improve the new code lines introduced in the PR, meaning lines in the '__new hunk__' sections that begin with a '+' symbol (after the line numbers). The '__old hunk__' sections code is for context and reference only. Specific instructions for generating code suggestions: - Provide up to {{ num_code_suggestions }} code suggestions. The suggestions should be diverse and insightful. -- The suggestions should focus on ways to improve the new code in the PR, meaning focusing on lines from '__new hunk__' sections, starting with '+'. Use the '__old hunk__' sections to understand the context of the code changes. +- The suggestions should focus on improving the new code introduced the PR, meaning lines from '__new hunk__' sections, starting with '+' (after the line numbers). - Prioritize suggestions that address possible issues, major problems, and bugs in the PR code. - Don't suggest to add docstring, type hints, or comments, or to remove unused imports. - Suggestions should not repeat code already present in the '__new hunk__' sections. diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index 0487a229..c3276033 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -8,8 +8,8 @@ from pr_agent.algo.ai_handlers.base_ai_handler import BaseAiHandler from pr_agent.algo.ai_handlers.litellm_ai_handler import LiteLLMAIHandler from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import PRReviewHeader, convert_to_markdown, github_action_output, load_yaml, ModelType, \ - show_relevant_configurations +from pr_agent.algo.utils import github_action_output, load_yaml, ModelType, \ + show_relevant_configurations, convert_to_markdown_v2, PRReviewHeader from pr_agent.config_loader import get_settings from pr_agent.git_providers import get_git_provider, get_git_provider_with_context from pr_agent.git_providers.git_provider import IncrementalPR, get_main_pr_language @@ -230,7 +230,7 @@ class PRReviewer: f"{self.git_provider.incremental.first_new_commit_sha}" incremental_review_markdown_text = f"Starting from commit {last_commit_url}" - markdown_text = convert_to_markdown(data, self.git_provider.is_supported("gfm_markdown"), + markdown_text = convert_to_markdown_v2(data, self.git_provider.is_supported("gfm_markdown"), incremental_review_markdown_text) # Add help text if gfm_markdown is supported