yaml

2025-07-21 04:50:39 +08:00 · 2023-08-28 09:48:43 +03:00
parent 33ef23289f
commit 2dc2a45e4b
3 changed files with 81 additions and 70 deletions
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -276,7 +276,7 @@ def _fix_key_value(key: str, value: str):
 def load_yaml(review_text: str) -> dict:
    review_text = review_text.removeprefix('```yaml').rstrip('`')
    try:
-        data = yaml.load(review_text, Loader=yaml.SafeLoader)
+        data = yaml.safe_load(review_text)
    except Exception as e:
        logging.error(f"Failed to parse AI prediction: {e}")
        data = try_fix_yaml(review_text)
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -1,8 +1,8 @@
 [pr_code_suggestions_prompt]
-system="""You are a language model called PR-Code-Reviewer.
+system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR).
-Your task is to provide meaningful actionable code suggestions, to improve the new code presented in a PR.
+Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR.
-Example PR Diff input:
+Example for a PR Diff input:
 '
 ## src/file1.py
@ -10,8 +10,8 @@ Example PR Diff input:
 __new hunk__
 12  code line that already existed in the file...
 13  code line that already existed in the file....
-14 +new code line added in the PR
+14 +new code line1 added in the PR
-15  code line that already existed in the file...
+15 +new code line2 added in the PR
 16  code line that already existed in the file...
 __old hunk__
 code line that already existed in the file...
@ -31,13 +31,17 @@ __old hunk__
 '
 Specific instructions:
 - Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
 - Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
 - Provide the exact line number range (inclusive) for each issue.
 - Assume there is additional relevant code, that is not included in the diff.
 - Provide up to {{ num_code_suggestions }} code suggestions.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
+- Prioritize suggestions that address major problems, issues and bugs in the code.
- Don't suggest to add docstring or type hints.
+  As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects.
  Don't suggest to add docstring or type hints.
  Try to provide diverse and insightful suggestions.
 - Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
  Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
  For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code.
 - Provide the exact line numbers range (inclusive) for each issue.
 - Assume there is additional relevant code, that is not included in the diff.
 {%- if extra_instructions %}
@ -45,63 +49,76 @@ Extra instructions from the user:
 {{ extra_instructions }}
 {%- endif %}
-You must use the following JSON schema to format your answer:
+You must use the following YAML schema to format your answer:
-```json
+```yaml
-{
+Code suggestions:
-    "Code suggestions": {
+  type: array
-      "type": "array",
+  minItems: 1
-      "minItems": 1,
+  maxItems: {{ num_code_suggestions }}
-      "maxItems": {{ num_code_suggestions }},
+  uniqueItems: true
-      "uniqueItems": "true",
+  items:
-      "items": {
+    relevant file:
-        "relevant file": {
+      type: string
-          "type": "string",
+      description: the relevant file full path
-          "description": "the relevant file full path"
+    suggestion content:
-        },
+      type: string
-        "suggestion content": {
+      description: |-
-          "type": "string",
+        a concrete suggestion for meaningfully improving the new PR code.
-          "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
+    existing code:
-        },
+      type: string
-        "existing code": {
+      description: |-
-          "type": "string",
+        a code snippet showing the relevant code lines from a '__new hunk__' section.
-          "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
+        It must be continuous, correctly formatted and indented, and without line numbers.
-        },
+    relevant lines:
-        "relevant lines": {
+      type: string
-          "type": "string",
+      description: |-
-          "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
+        the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'.
-        },
+        For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
-        "improved code": {
+    improved code:
-          "type": "string",
+      type: string
-          "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
+      description: |-
-        }
+        a new code snippet that can be used to replace the relevant lines in '__new hunk__' code.
-      }
+        Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.
    }
 }
 ```
-Don't output line numbers in the 'improved code' snippets.
+Example output:
 ```yaml
 Code suggestions:
  - relevant file: |-
        src/file1.py
    suggestion content: |-
        Add a docstring to func1()
    existing code: |-
        def func1():
    relevant lines: '12-12'
    improved code: |-
        ...
 ```
 Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
 Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
 """
 user="""PR Info:
 Title: '{{title}}'
 Branch: '{{branch}}'
 Description: '{{description}}'
 {%- if language %}
 Main language: {{language}}
 {%- endif %}
 {%- if commit_messages_str %}
-Commit messages:
+Title: '{{title}}'
-{{commit_messages_str}}
+
 Branch: '{{branch}}'
 Description: '{{description}}'
 {%- if language %}
 Main language: {{language}}
 {%- endif %}
 The PR Diff:
 ```
-{{diff}}
+{{- diff|trim }}
 ```
-Response (should be a valid JSON, and nothing else):
+Response (should be a valid YAML, and nothing else):
-```json
+```yaml
 """
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -1,16 +1,13 @@
 import copy
 import json
 import logging
 import textwrap
-from typing import List
+from typing import List, Dict
 import yaml
 from jinja2 import Environment, StrictUndefined
 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import try_fix_json
+from pr_agent.algo.utils import load_yaml
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import BitbucketProvider, get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@ -98,14 +95,11 @@ class PRCodeSuggestions:
        return response
-    def _prepare_pr_code_suggestions(self) -> str:
+    def _prepare_pr_code_suggestions(self) -> Dict:
        review = self.prediction.strip()
-        try:
+        data = load_yaml(review)
-            data = json.loads(review)
+        if isinstance(data, list):
-        except json.decoder.JSONDecodeError:
+            data = {'Code suggestions': data}
            if get_settings().config.verbosity_level >= 2:
                logging.info(f"Could not parse json response: {review}")
            data = try_fix_json(review, code_suggestions=True)
        return data
    def push_inline_code_suggestions(self, data):
@ -227,7 +221,7 @@ class PRCodeSuggestions:
            response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
                                                                            user=user_prompt)
-            sort_order = yaml.safe_load(response)
+            sort_order = load_yaml(response)
            for s in sort_order['Sort Order']:
                suggestion_number = s['suggestion number']
                importance_order = s['importance order']