This commit is contained in:
mrT23
2023-08-28 09:48:43 +03:00
parent 33ef23289f
commit 2dc2a45e4b
3 changed files with 81 additions and 70 deletions

View File

@ -276,7 +276,7 @@ def _fix_key_value(key: str, value: str):
def load_yaml(review_text: str) -> dict: def load_yaml(review_text: str) -> dict:
review_text = review_text.removeprefix('```yaml').rstrip('`') review_text = review_text.removeprefix('```yaml').rstrip('`')
try: try:
data = yaml.load(review_text, Loader=yaml.SafeLoader) data = yaml.safe_load(review_text)
except Exception as e: except Exception as e:
logging.error(f"Failed to parse AI prediction: {e}") logging.error(f"Failed to parse AI prediction: {e}")
data = try_fix_yaml(review_text) data = try_fix_yaml(review_text)

View File

@ -1,8 +1,8 @@
[pr_code_suggestions_prompt] [pr_code_suggestions_prompt]
system="""You are a language model called PR-Code-Reviewer. system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR).
Your task is to provide meaningful actionable code suggestions, to improve the new code presented in a PR. Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR.
Example PR Diff input: Example for a PR Diff input:
' '
## src/file1.py ## src/file1.py
@ -10,8 +10,8 @@ Example PR Diff input:
__new hunk__ __new hunk__
12 code line that already existed in the file... 12 code line that already existed in the file...
13 code line that already existed in the file.... 13 code line that already existed in the file....
14 +new code line added in the PR 14 +new code line1 added in the PR
15 code line that already existed in the file... 15 +new code line2 added in the PR
16 code line that already existed in the file... 16 code line that already existed in the file...
__old hunk__ __old hunk__
code line that already existed in the file... code line that already existed in the file...
@ -31,13 +31,17 @@ __old hunk__
' '
Specific instructions: Specific instructions:
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Provide the exact line number range (inclusive) for each issue.
- Assume there is additional relevant code, that is not included in the diff.
- Provide up to {{ num_code_suggestions }} code suggestions. - Provide up to {{ num_code_suggestions }} code suggestions.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code. - Prioritize suggestions that address major problems, issues and bugs in the code.
- Don't suggest to add docstring or type hints. As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects.
Don't suggest to add docstring or type hints.
Try to provide diverse and insightful suggestions.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code.
- Provide the exact line numbers range (inclusive) for each issue.
- Assume there is additional relevant code, that is not included in the diff.
{%- if extra_instructions %} {%- if extra_instructions %}
@ -45,63 +49,76 @@ Extra instructions from the user:
{{ extra_instructions }} {{ extra_instructions }}
{%- endif %} {%- endif %}
You must use the following JSON schema to format your answer: You must use the following YAML schema to format your answer:
```json ```yaml
{ Code suggestions:
"Code suggestions": { type: array
"type": "array", minItems: 1
"minItems": 1, maxItems: {{ num_code_suggestions }}
"maxItems": {{ num_code_suggestions }}, uniqueItems: true
"uniqueItems": "true", items:
"items": { relevant file:
"relevant file": { type: string
"type": "string", description: the relevant file full path
"description": "the relevant file full path" suggestion content:
}, type: string
"suggestion content": { description: |-
"type": "string", a concrete suggestion for meaningfully improving the new PR code.
"description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')." existing code:
}, type: string
"existing code": { description: |-
"type": "string", a code snippet showing the relevant code lines from a '__new hunk__' section.
"description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers." It must be continuous, correctly formatted and indented, and without line numbers.
}, relevant lines:
"relevant lines": { type: string
"type": "string", description: |-
"description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above." the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'.
}, For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
"improved code": { improved code:
"type": "string", type: string
"description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers." description: |-
} a new code snippet that can be used to replace the relevant lines in '__new hunk__' code.
} Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.
}
}
``` ```
Don't output line numbers in the 'improved code' snippets. Example output:
```yaml
Code suggestions:
- relevant file: |-
src/file1.py
suggestion content: |-
Add a docstring to func1()
existing code: |-
def func1():
relevant lines: '12-12'
improved code: |-
...
```
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields. Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
""" """
user="""PR Info: user="""PR Info:
Title: '{{title}}'
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
{%- if commit_messages_str %}
Commit messages: Title: '{{title}}'
{{commit_messages_str}}
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %} {%- endif %}
The PR Diff: The PR Diff:
``` ```
{{diff}} {{- diff|trim }}
``` ```
Response (should be a valid JSON, and nothing else): Response (should be a valid YAML, and nothing else):
```json ```yaml
""" """

View File

@ -1,16 +1,13 @@
import copy import copy
import json
import logging import logging
import textwrap import textwrap
from typing import List from typing import List, Dict
import yaml
from jinja2 import Environment, StrictUndefined from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs
from pr_agent.algo.token_handler import TokenHandler from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import try_fix_json from pr_agent.algo.utils import load_yaml
from pr_agent.config_loader import get_settings from pr_agent.config_loader import get_settings
from pr_agent.git_providers import BitbucketProvider, get_git_provider from pr_agent.git_providers import BitbucketProvider, get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language from pr_agent.git_providers.git_provider import get_main_pr_language
@ -98,14 +95,11 @@ class PRCodeSuggestions:
return response return response
def _prepare_pr_code_suggestions(self) -> str: def _prepare_pr_code_suggestions(self) -> Dict:
review = self.prediction.strip() review = self.prediction.strip()
try: data = load_yaml(review)
data = json.loads(review) if isinstance(data, list):
except json.decoder.JSONDecodeError: data = {'Code suggestions': data}
if get_settings().config.verbosity_level >= 2:
logging.info(f"Could not parse json response: {review}")
data = try_fix_json(review, code_suggestions=True)
return data return data
def push_inline_code_suggestions(self, data): def push_inline_code_suggestions(self, data):
@ -227,7 +221,7 @@ class PRCodeSuggestions:
response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt, response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
user=user_prompt) user=user_prompt)
sort_order = yaml.safe_load(response) sort_order = load_yaml(response)
for s in sort_order['Sort Order']: for s in sort_order['Sort Order']:
suggestion_number = s['suggestion number'] suggestion_number = s['suggestion number']
importance_order = s['importance order'] importance_order = s['importance order']