diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 4d09b6e7..1259a46e 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -276,7 +276,7 @@ def _fix_key_value(key: str, value: str): def load_yaml(review_text: str) -> dict: review_text = review_text.removeprefix('```yaml').rstrip('`') try: - data = yaml.load(review_text, Loader=yaml.SafeLoader) + data = yaml.safe_load(review_text) except Exception as e: logging.error(f"Failed to parse AI prediction: {e}") data = try_fix_yaml(review_text) diff --git a/pr_agent/settings/pr_code_suggestions_prompts.toml b/pr_agent/settings/pr_code_suggestions_prompts.toml index 4e4b57e5..f60b9cc2 100644 --- a/pr_agent/settings/pr_code_suggestions_prompts.toml +++ b/pr_agent/settings/pr_code_suggestions_prompts.toml @@ -1,8 +1,8 @@ [pr_code_suggestions_prompt] -system="""You are a language model called PR-Code-Reviewer. -Your task is to provide meaningful actionable code suggestions, to improve the new code presented in a PR. +system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR). +Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR. -Example PR Diff input: +Example for a PR Diff input: ' ## src/file1.py @@ -10,8 +10,8 @@ Example PR Diff input: __new hunk__ 12 code line that already existed in the file... 13 code line that already existed in the file.... -14 +new code line added in the PR -15 code line that already existed in the file... +14 +new code line1 added in the PR +15 +new code line2 added in the PR 16 code line that already existed in the file... __old hunk__ code line that already existed in the file... @@ -31,13 +31,17 @@ __old hunk__ ' Specific instructions: -- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices. -- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+'). -- Provide the exact line number range (inclusive) for each issue. -- Assume there is additional relevant code, that is not included in the diff. - Provide up to {{ num_code_suggestions }} code suggestions. -- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code. -- Don't suggest to add docstring or type hints. +- Prioritize suggestions that address major problems, issues and bugs in the code. + As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects. + Don't suggest to add docstring or type hints. + Try to provide diverse and insightful suggestions. +- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+'). + Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code. + For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code. +- Provide the exact line numbers range (inclusive) for each issue. +- Assume there is additional relevant code, that is not included in the diff. + {%- if extra_instructions %} @@ -45,63 +49,76 @@ Extra instructions from the user: {{ extra_instructions }} {%- endif %} -You must use the following JSON schema to format your answer: -```json -{ - "Code suggestions": { - "type": "array", - "minItems": 1, - "maxItems": {{ num_code_suggestions }}, - "uniqueItems": "true", - "items": { - "relevant file": { - "type": "string", - "description": "the relevant file full path" - }, - "suggestion content": { - "type": "string", - "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')." - }, - "existing code": { - "type": "string", - "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers." - }, - "relevant lines": { - "type": "string", - "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above." - }, - "improved code": { - "type": "string", - "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers." - } - } - } -} +You must use the following YAML schema to format your answer: +```yaml +Code suggestions: + type: array + minItems: 1 + maxItems: {{ num_code_suggestions }} + uniqueItems: true + items: + relevant file: + type: string + description: the relevant file full path + suggestion content: + type: string + description: |- + a concrete suggestion for meaningfully improving the new PR code. + existing code: + type: string + description: |- + a code snippet showing the relevant code lines from a '__new hunk__' section. + It must be continuous, correctly formatted and indented, and without line numbers. + relevant lines: + type: string + description: |- + the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. + For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above. + improved code: + type: string + description: |- + a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. + Replacement suggestions should be complete, correctly formatted and indented, and without line numbers. ``` -Don't output line numbers in the 'improved code' snippets. +Example output: +```yaml +Code suggestions: + - relevant file: |- + src/file1.py + suggestion content: |- + Add a docstring to func1() + existing code: |- + def func1(): + relevant lines: '12-12' + improved code: |- + ... +``` + + +Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-'). Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. """ user="""PR Info: -Title: '{{title}}' -Branch: '{{branch}}' -Description: '{{description}}' -{%- if language %} -Main language: {{language}} -{%- endif %} -{%- if commit_messages_str %} -Commit messages: -{{commit_messages_str}} +Title: '{{title}}' + +Branch: '{{branch}}' + +Description: '{{description}}' + +{%- if language %} + +Main language: {{language}} {%- endif %} The PR Diff: ``` -{{diff}} +{{- diff|trim }} ``` -Response (should be a valid JSON, and nothing else): -```json +Response (should be a valid YAML, and nothing else): +```yaml """ diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index cc787f5e..d9fb3051 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -1,16 +1,13 @@ import copy -import json import logging import textwrap -from typing import List - -import yaml +from typing import List, Dict from jinja2 import Environment, StrictUndefined from pr_agent.algo.ai_handler import AiHandler from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs from pr_agent.algo.token_handler import TokenHandler -from pr_agent.algo.utils import try_fix_json +from pr_agent.algo.utils import load_yaml from pr_agent.config_loader import get_settings from pr_agent.git_providers import BitbucketProvider, get_git_provider from pr_agent.git_providers.git_provider import get_main_pr_language @@ -98,14 +95,11 @@ class PRCodeSuggestions: return response - def _prepare_pr_code_suggestions(self) -> str: + def _prepare_pr_code_suggestions(self) -> Dict: review = self.prediction.strip() - try: - data = json.loads(review) - except json.decoder.JSONDecodeError: - if get_settings().config.verbosity_level >= 2: - logging.info(f"Could not parse json response: {review}") - data = try_fix_json(review, code_suggestions=True) + data = load_yaml(review) + if isinstance(data, list): + data = {'Code suggestions': data} return data def push_inline_code_suggestions(self, data): @@ -227,7 +221,7 @@ class PRCodeSuggestions: response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt, user=user_prompt) - sort_order = yaml.safe_load(response) + sort_order = load_yaml(response) for s in sort_order['Sort Order']: suggestion_number = s['suggestion number'] importance_order = s['importance order']