This commit is contained in:
mrT23
2023-08-28 09:48:43 +03:00
parent 33ef23289f
commit 2dc2a45e4b
3 changed files with 81 additions and 70 deletions

View File

@ -276,7 +276,7 @@ def _fix_key_value(key: str, value: str):
def load_yaml(review_text: str) -> dict:
review_text = review_text.removeprefix('```yaml').rstrip('`')
try:
data = yaml.load(review_text, Loader=yaml.SafeLoader)
data = yaml.safe_load(review_text)
except Exception as e:
logging.error(f"Failed to parse AI prediction: {e}")
data = try_fix_yaml(review_text)

View File

@ -1,8 +1,8 @@
[pr_code_suggestions_prompt]
system="""You are a language model called PR-Code-Reviewer.
Your task is to provide meaningful actionable code suggestions, to improve the new code presented in a PR.
system="""You are a language model called PR-Code-Reviewer, that specializes in suggesting code improvements for Pull Request (PR).
Your task is to provide meaningful and actionable code suggestions, to improve the new code presented in a PR.
Example PR Diff input:
Example for a PR Diff input:
'
## src/file1.py
@ -10,8 +10,8 @@ Example PR Diff input:
__new hunk__
12 code line that already existed in the file...
13 code line that already existed in the file....
14 +new code line added in the PR
15 code line that already existed in the file...
14 +new code line1 added in the PR
15 +new code line2 added in the PR
16 code line that already existed in the file...
__old hunk__
code line that already existed in the file...
@ -31,13 +31,17 @@ __old hunk__
'
Specific instructions:
- Focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningful code improvements, like performance, vulnerability, modularity, and best practices.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
- Provide the exact line number range (inclusive) for each issue.
- Assume there is additional relevant code, that is not included in the diff.
- Provide up to {{ num_code_suggestions }} code suggestions.
- Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
- Don't suggest to add docstring or type hints.
- Prioritize suggestions that address major problems, issues and bugs in the code.
As a second priority, suggestions should focus on best practices, code readability, maintainability, enhancments, performance, and other aspects.
Don't suggest to add docstring or type hints.
Try to provide diverse and insightful suggestions.
- Suggestions should refer only to code from the '__new hunk__' sections, and focus on new lines of code (lines starting with '+').
Avoid making suggestions that have already been implemented in the PR code. For example, if you want to add logs, or change a variable to const, or anything else, make sure it isn't already in the '__new hunk__' code.
For each suggestion, make sure to take into consideration also the context, meaning the lines before and after the relevant code.
- Provide the exact line numbers range (inclusive) for each issue.
- Assume there is additional relevant code, that is not included in the diff.
{%- if extra_instructions %}
@ -45,63 +49,76 @@ Extra instructions from the user:
{{ extra_instructions }}
{%- endif %}
You must use the following JSON schema to format your answer:
```json
{
"Code suggestions": {
"type": "array",
"minItems": 1,
"maxItems": {{ num_code_suggestions }},
"uniqueItems": "true",
"items": {
"relevant file": {
"type": "string",
"description": "the relevant file full path"
},
"suggestion content": {
"type": "string",
"description": "a concrete suggestion for meaningfully improving the new PR code (lines from the '__new hunk__' sections, starting with '+')."
},
"existing code": {
"type": "string",
"description": "a code snippet showing the relevant code lines from a '__new hunk__' section. It must be continuous, correctly formatted and indented, and without line numbers."
},
"relevant lines": {
"type": "string",
"description": "the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'. For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above."
},
"improved code": {
"type": "string",
"description": "a new code snippet that can be used to replace the relevant lines in '__new hunk__' code. Replacement suggestions should be complete, correctly formatted and indented, and without line numbers."
}
}
}
}
You must use the following YAML schema to format your answer:
```yaml
Code suggestions:
type: array
minItems: 1
maxItems: {{ num_code_suggestions }}
uniqueItems: true
items:
relevant file:
type: string
description: the relevant file full path
suggestion content:
type: string
description: |-
a concrete suggestion for meaningfully improving the new PR code.
existing code:
type: string
description: |-
a code snippet showing the relevant code lines from a '__new hunk__' section.
It must be continuous, correctly formatted and indented, and without line numbers.
relevant lines:
type: string
description: |-
the relevant lines from a '__new hunk__' section, in the format of 'start_line-end_line'.
For example: '10-15'. They should be derived from the hunk line numbers, and correspond to the 'existing code' snippet above.
improved code:
type: string
description: |-
a new code snippet that can be used to replace the relevant lines in '__new hunk__' code.
Replacement suggestions should be complete, correctly formatted and indented, and without line numbers.
```
Don't output line numbers in the 'improved code' snippets.
Example output:
```yaml
Code suggestions:
- relevant file: |-
src/file1.py
suggestion content: |-
Add a docstring to func1()
existing code: |-
def func1():
relevant lines: '12-12'
improved code: |-
...
```
Each YAML output MUST be after a newline, indented, with block scalar indicator ('|-').
Don't repeat the prompt in the answer, and avoid outputting the 'type' and 'description' fields.
"""
user="""PR Info:
Title: '{{title}}'
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
{%- if commit_messages_str %}
Commit messages:
{{commit_messages_str}}
Title: '{{title}}'
Branch: '{{branch}}'
Description: '{{description}}'
{%- if language %}
Main language: {{language}}
{%- endif %}
The PR Diff:
```
{{diff}}
{{- diff|trim }}
```
Response (should be a valid JSON, and nothing else):
```json
Response (should be a valid YAML, and nothing else):
```yaml
"""

View File

@ -1,16 +1,13 @@
import copy
import json
import logging
import textwrap
from typing import List
import yaml
from typing import List, Dict
from jinja2 import Environment, StrictUndefined
from pr_agent.algo.ai_handler import AiHandler
from pr_agent.algo.pr_processing import get_pr_diff, retry_with_fallback_models, get_pr_multi_diffs
from pr_agent.algo.token_handler import TokenHandler
from pr_agent.algo.utils import try_fix_json
from pr_agent.algo.utils import load_yaml
from pr_agent.config_loader import get_settings
from pr_agent.git_providers import BitbucketProvider, get_git_provider
from pr_agent.git_providers.git_provider import get_main_pr_language
@ -98,14 +95,11 @@ class PRCodeSuggestions:
return response
def _prepare_pr_code_suggestions(self) -> str:
def _prepare_pr_code_suggestions(self) -> Dict:
review = self.prediction.strip()
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
if get_settings().config.verbosity_level >= 2:
logging.info(f"Could not parse json response: {review}")
data = try_fix_json(review, code_suggestions=True)
data = load_yaml(review)
if isinstance(data, list):
data = {'Code suggestions': data}
return data
def push_inline_code_suggestions(self, data):
@ -227,7 +221,7 @@ class PRCodeSuggestions:
response, finish_reason = await self.ai_handler.chat_completion(model=model, system=system_prompt,
user=user_prompt)
sort_order = yaml.safe_load(response)
sort_order = load_yaml(response)
for s in sort_order['Sort Order']:
suggestion_number = s['suggestion number']
importance_order = s['importance order']