yaml

2025-07-21 04:50:39 +08:00 · 2023-08-28 09:48:43 +03:00
parent 33ef23289f
commit 2dc2a45e4b
3 changed files with 81 additions and 70 deletions
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@ -276,7 +276,7 @@ def _fix_key_value(key: str, value: str):
 def load_yaml(review_text: str) -> dict:
    review_text = review_text.removeprefix('```yaml').rstrip('`')
    try:
-        data = yaml.load(review_text, Loader=yaml.SafeLoader)
+        data = yaml.safe_load(review_text)
    except Exception as e:
        logging.error(f"Failed to parse AI prediction: {e}")
        data = try_fix_yaml(review_text)
--- a/pr_agent/settings/pr_code_suggestions_prompts.toml
+++ b/pr_agent/settings/pr_code_suggestions_prompts.toml
@ -1,8 +1,8 @@
 [pr_code_suggestions_prompt]
-system="""You are a language model called PR-Code-Reviewer.
-Your task is to provide meaningful actionable code suggestions, to improve the new code presented in a PR.
+system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR).
+Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR.

-Example PR Diff input:
+Example for a PR Diff input:
 '
 ## src/file1.py

@ -10,8 +10,8 @@ Example PR Diff input:
 __new hunk__
 12  code line that already existed in the file...
 13  code line that already existed in the file....
-14 +new code line added in the PR
-15  code line that already existed in the file...
+14 +new code line1 added in the PR
+15 +new code line2 added in the PR
 16  code line that already existed in the file...
 __old hunk__
 code line that already existed in the file...
@ -31,13 +31,17 @@ __old hunk__
 '

 Specific instructions:
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Provide the exact line number range (inclusive) for each issue.
- Assume there is additional relevant code, that is not included in the diff.
 - Provide up to {{ num_code_suggestions }} code suggestions.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
- Don't suggest to add docstring or type hints.
+- Prioritize suggestions that address major problems, issues and bugs in the code.
+  As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects.
+  Don't suggest to add docstring or type hints.
+  Try to provide diverse and insightful suggestions.
+- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
+  Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
+  For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code.
+- Provide the exact line numbers range (inclusive) for each issue.
+- Assume there is additional relevant code, that is not included in the diff.
+

 {%- if extra_instructions %}

@ -45,63 +49,76 @@ Extra instructions from the user:
 {{ extra_instructions }}
 {%- endif %}

-You must use the following JSON schema to format your answer:
-```json
-{
-    "Code suggestions": {
-      "type": "array",
-      "minItems": 1,
-      "maxItems": {{ num_code_suggestions }},
-      "uniqueItems": "true",
-      "items": {
-        "relevant file": {
-          "type": "string",
-          "description": "the relevant file full path"
-        },
-        "suggestion content": {
-          "type": "string",
-          "description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
-        },
-        "existing code": {
-          "type": "string",
-          "description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
-        },
-        "relevant lines": {
-          "type": "string",
-          "description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
-        },
-        "improved code": {
-          "type": "string",
-          "description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
-        }
-      }
-    }
-}
+You must use the following YAML schema to format your answer:
+```yaml
+Code suggestions:
+  type: array
+  minItems: 1
+  maxItems: {{ num_code_suggestions }}
+  uniqueItems: true
+  items:
+    relevant file:
+      type: string
+      description: the relevant file full path
+    suggestion content:
+      type: string
+      description: |-
+        a concrete suggestion for meaningfully improving the new PR code.
+    existing code:
+      type: string
+      description: |-
+        a code snippet showing the relevant code lines from a '__new hunk__' section.
+        It must be continuous, correctly formatted and indented, and without line numbers.
+    relevant lines:
+      type: string
+      description: |-
+        the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'.
+        For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
+    improved code:
+      type: string
+      description: |-
+        a new code snippet that can be used to replace the relevant lines in '__new hunk__' code.
+        Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.
 ```

-Don't output line numbers in the 'improved code' snippets.
+Example output:
+```yaml
+Code suggestions:
+  - relevant file: |-
+        src/file1.py
+    suggestion content: |-
+        Add a docstring to func1()
+    existing code: |-
+        def func1():
+    relevant lines: '12-12'
+    improved code: |-
+        ...
+```
+
+
+Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
 Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
 """

 user="""PR Info:
-Title: '{{title}}'
-Branch: '{{branch}}'
-Description: '{{description}}'
-{%- if language %}
-Main language: {{language}}
-{%- endif %}
-{%- if commit_messages_str %}

-Commit messages:
-{{commit_messages_str}}
+Title: '{{title}}'
+
+Branch: '{{branch}}'
+
+Description: '{{description}}'
+
+{%- if language %}
+
+Main language: {{language}}
 {%- endif %}


 The PR Diff:
 ```
-{{diff}}
+{{- diff|trim }}
 ```

-Response (should be a valid JSON, and nothing else):
-```json
+Response (should be a valid YAML, and nothing else):
+```yaml
 """
--- a/pr_agent/tools/pr_code_suggestions.py
+++ b/pr_agent/tools/pr_code_suggestions.py
@ -1,16 +1,13 @@
 import copy
-import json
 import logging
 import textwrap
-from typing import List
-
-import yaml
+from typing import List, Dict
 from jinja2 import Environment, StrictUndefined

 from pr_agent.algo.ai_handler import AiHandler
 from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs
 from pr_agent.algo.token_handler import TokenHandler
-from pr_agent.algo.utils import try_fix_json
+from pr_agent.algo.utils import load_yaml
 from pr_agent.config_loader import get_settings
 from pr_agent.git_providers import BitbucketProvider, get_git_provider
 from pr_agent.git_providers.git_provider import get_main_pr_language
@ -98,14 +95,11 @@ class PRCodeSuggestions:

        return response

-    def _prepare_pr_code_suggestions(self) -> str:
+    def _prepare_pr_code_suggestions(self) -> Dict:
        review = self.prediction.strip()
-        try:
-            data = json.loads(review)
-        except json.decoder.JSONDecodeError:
-            if get_settings().config.verbosity_level >= 2:
-                logging.info(f"Could not parse json response: {review}")
-            data = try_fix_json(review, code_suggestions=True)
+        data = load_yaml(review)
+        if isinstance(data, list):
+            data = {'Code suggestions': data}
        return data

    def push_inline_code_suggestions(self, data):
@ -227,7 +221,7 @@ class PRCodeSuggestions:
            response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
                                                                            user=user_prompt)

-            sort_order = yaml.safe_load(response)
+            sort_order = load_yaml(response)
            for s in sort_order['Sort Order']:
                suggestion_number = s['suggestion number']
                importance_order = s['importance order']