Merge pull request #43 from Codium-ai/tr/inline_code_suggestions

Tr/inline code suggestions
This commit is contained in:
mrT23
2023-07-13 10:48:42 +03:00
committed by GitHub
10 changed files with 56 additions and 108 deletions

View File

@ -250,45 +250,6 @@ require_tests_review=true
require_security_review=true require_security_review=true
``` ```
#### Code Suggestions configuration:
There are also configuration options to control different aspects of the `code suggestions` feature.
The number of suggestions provided can be controlled by adjusting the following parameter:
```
num_code_suggestions=4
```
You can also enable more verbose and informative mode of code suggestions:
```
extended_code_suggestions=false
```
This is a comparison of the regular and extended code suggestions modes:
- **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information like username and password. Use environment variables or a secure method to store these values. [important]
Example for extended suggestion:
- **relevant file:** sql.py
- **suggestion content:** Remove hardcoded sensitive information (username and password) [important]
- **why:** Hardcoding sensitive information is a security risk. It's better to use environment variables or a secure way to store these values.
- **code example:**
- **before code:**
```
user = "root",
password = "Mysql@123",
```
- **after code:**
```
user = os.getenv('DB_USER'),
password = os.getenv('DB_PASSWORD'),
```
---
## How it works ## How it works
![PR-Agent Tools](./pics/pr_agent_overview.png) ![PR-Agent Tools](./pics/pr_agent_overview.png)

View File

@ -24,10 +24,10 @@ def get_pr_diff(git_provider: Union[GithubProvider, Any], token_handler: TokenHa
Returns a string with the diff of the PR. Returns a string with the diff of the PR.
If needed, apply diff minimization techniques to reduce the number of tokens If needed, apply diff minimization techniques to reduce the number of tokens
""" """
files = list(git_provider.get_diff_files()) git_provider.pr.files = list(git_provider.get_diff_files())
# get pr languages # get pr languages
pr_languages = sort_files_by_main_languages(git_provider.get_languages(), files) pr_languages = sort_files_by_main_languages(git_provider.get_languages(), git_provider.pr.files)
# generate a standard diff string, with patch extension # generate a standard diff string, with patch extension
patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler) patches_extended, total_tokens = pr_generate_extended_diff(pr_languages, token_handler)

View File

@ -53,10 +53,7 @@ def parse_code_suggestion(code_suggestions: dict) -> str:
code_str_indented = textwrap.indent(code_str, ' ') code_str_indented = textwrap.indent(code_str, ' ')
markdown_text += f" - **{code_key}:**\n{code_str_indented}\n" markdown_text += f" - **{code_key}:**\n{code_str_indented}\n"
else: else:
if "suggestion number" in sub_key.lower(): if "relevant file" in sub_key.lower():
# markdown_text += f"- **suggestion {sub_value}:**\n" # prettier formatting
pass
elif "relevant file" in sub_key.lower():
markdown_text += f"\n - **{sub_key}:** {sub_value}\n" markdown_text += f"\n - **{sub_key}:** {sub_value}\n"
else: else:
markdown_text += f" **{sub_key}:** {sub_value}\n" markdown_text += f" **{sub_key}:** {sub_value}\n"

View File

@ -26,6 +26,8 @@ class GithubProvider:
self.pr = self._get_pr() self.pr = self._get_pr()
def get_files(self): def get_files(self):
if hasattr(self.pr, 'files'):
return self.pr.files
return self.pr.get_files() return self.pr.get_files()
def get_diff_files(self) -> list[FilePatchInfo]: def get_diff_files(self) -> list[FilePatchInfo]:

View File

@ -2,14 +2,14 @@
model="gpt-4-0613" model="gpt-4-0613"
git_provider="github" git_provider="github"
publish_review=true publish_review=true
verbosity_level=0 # 0,1,2 verbosity_level=0 # 0,1,2
[pr_reviewer] [pr_reviewer]
require_focused_review=true require_focused_review=true
require_tests_review=true require_tests_review=true
require_security_review=true require_security_review=true
extended_code_suggestions=false
num_code_suggestions=4 num_code_suggestions=4
inline_code_comments = true
[pr_questions] [pr_questions]

View File

@ -3,9 +3,6 @@ system="""You are CodiumAI-PR-Reviewer, a language model designed to review git
Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines). Your task is to provide constructive and concise feedback for the PR, and also provide meaningfull code suggestions to improve the new PR code (the '+' lines).
- Provide up to {{ num_code_suggestions }} code suggestions. - Provide up to {{ num_code_suggestions }} code suggestions.
- Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices. - Try to focus on important suggestions like fixing code problems, issues and bugs. As a second priority, provide suggestions for meaningfull code improvements, like performance, vulnerability, modularity, and best practices.
{%- if extended_code_suggestions %}
- For each suggestion, provide a short and concise code snippet to illustrate the existing code, and the improved code.
{%- endif %}
- Make sure not to provide suggestion repeating modifications already implemented in the new PR code (the '+' lines). - Make sure not to provide suggestion repeating modifications already implemented in the new PR code (the '+' lines).
You must use the following JSON schema to format your answer: You must use the following JSON schema to format your answer:
@ -47,41 +44,18 @@ You must use the following JSON schema to format your answer:
"maxItems": {{ num_code_suggestions }}, "maxItems": {{ num_code_suggestions }},
"uniqueItems": true, "uniqueItems": true,
"items": { "items": {
"suggestion number": {
"type": "int",
"description": "suggestion number, starting from 1"
},
"relevant file": { "relevant file": {
"type": "string", "type": "string",
"description": "the relevant file name" "description": "the relevant file full path"
}, },
"suggestion content": { "suggestion content": {
"type": "string", "type": "string",
{%- if extended_code_suggestions %}
"description": "a concrete suggestion for meaningfully improving the new PR code. Don't repeat previous suggestions. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
{%- else %}
"description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like. "description": "a concrete suggestion for meaningfully improving the new PR code. Also describe how, specifically, the suggestion can be applied to new PR code. Add tags with importance measure that matches each suggestion ('important' or 'medium'). Do not make suggestions for updating or adding docstrings, renaming PR title and description, or linter like.
{%- endif %}
}, },
{%- if extended_code_suggestions %} "relevant line in file": {
"why": {
"type": "string", "type": "string",
"description": "shortly explain why this suggestion is important" "description": "an authentic single code line from the PR git diff section, to which the suggestion applies."
},
"code example": {
"type": "object",
"properties": {
"before code": {
"type": "string",
"description": "Short and concise code snippet, to illustrate the existing code"
},
"after code": {
"type": "string",
"description": "Short and concise code snippet, to illustrate the improved code"
}
}
} }
{%- endif %}
} }
}, },
{%- if require_security %} {%- if require_security %}
@ -115,17 +89,9 @@ Example output:
"General PR suggestions": "..., `xxx`...", "General PR suggestions": "..., `xxx`...",
"Code suggestions": [ "Code suggestions": [
{ {
"suggestion number": 1, "relevant file": "directory/xxx.py",
"relevant file": "xxx.py",
"suggestion content": "xxx [important]", "suggestion content": "xxx [important]",
{%- if extended_code_suggestions %} "relevant line in file": "xxx",
"why": "xxx",
"code example":
{
"before code": "xxx",
"after code": "xxx"
}
{%- endif %}
}, },
... ...
] ]

View File

@ -33,7 +33,6 @@ class PRReviewer:
"require_tests": settings.pr_reviewer.require_tests_review, "require_tests": settings.pr_reviewer.require_tests_review,
"require_security": settings.pr_reviewer.require_security_review, "require_security": settings.pr_reviewer.require_security_review,
"require_focused": settings.pr_reviewer.require_focused_review, "require_focused": settings.pr_reviewer.require_focused_review,
'extended_code_suggestions': settings.pr_reviewer.extended_code_suggestions,
'num_code_suggestions': settings.pr_reviewer.num_code_suggestions, 'num_code_suggestions': settings.pr_reviewer.num_code_suggestions,
} }
self.token_handler = TokenHandler(self.git_provider.pr, self.token_handler = TokenHandler(self.git_provider.pr,
@ -55,6 +54,9 @@ class PRReviewer:
logging.info('Pushing PR review...') logging.info('Pushing PR review...')
self.git_provider.publish_comment(pr_comment) self.git_provider.publish_comment(pr_comment)
self.git_provider.remove_initial_comment() self.git_provider.remove_initial_comment()
if settings.pr_reviewer.inline_code_comments:
logging.info('Pushing inline code comments...')
self._publish_inline_code_comments()
return "" return ""
async def _get_prediction(self): async def _get_prediction(self):
@ -86,6 +88,9 @@ class PRReviewer:
del data['PR Feedback']['Security concerns'] del data['PR Feedback']['Security concerns']
data['PR Analysis']['Security concerns'] = val data['PR Analysis']['Security concerns'] = val
if settings.config.git_provider == 'github' and settings.pr_reviewer.inline_code_comments:
del data['PR Feedback']['Code suggestions']
markdown_text = convert_to_markdown(data) markdown_text = convert_to_markdown(data)
user = self.git_provider.get_user_id() user = self.git_provider.get_user_id()
@ -103,4 +108,37 @@ class PRReviewer:
if settings.config.verbosity_level >= 2: if settings.config.verbosity_level >= 2:
logging.info(f"Markdown response:\n{markdown_text}") logging.info(f"Markdown response:\n{markdown_text}")
return markdown_text return markdown_text
def _publish_inline_code_comments(self):
if settings.config.git_provider != 'github': # inline comments are currently only supported for github
return
review = self.prediction.strip()
try:
data = json.loads(review)
except json.decoder.JSONDecodeError:
data = try_fix_json(review)
pr = self.git_provider.pr
last_commit_id = list(pr.get_commits())[-1]
files = list(self.git_provider.get_diff_files())
for d in data['PR Feedback']['Code suggestions']:
relevant_file = d['relevant file'].strip()
relevant_line_in_file = d['relevant line in file'].strip()
content = d['suggestion content']
position = -1
for file in files:
if file.filename.strip() == relevant_file:
patch = file.patch
patch_lines = patch.splitlines()
for i, line in enumerate(patch_lines):
if relevant_line_in_file in line:
position = i
if position == -1:
logging.info(f"Could not find position for {relevant_file} {relevant_line_in_file}")
else:
body = content
path = relevant_file.strip()
pr.create_review_comment(body=body, commit_id=last_commit_id, path=path, position=position)

View File

@ -54,14 +54,12 @@ class TestConvertToMarkdown:
'General PR suggestions': 'general suggestion...', 'General PR suggestions': 'general suggestion...',
'Code suggestions': [ 'Code suggestions': [
{ {
'Suggestion number': 1,
'Code example': { 'Code example': {
'Before': 'Code before', 'Before': 'Code before',
'After': 'Code after' 'After': 'Code after'
} }
}, },
{ {
'Suggestion number': 2,
'Code example': { 'Code example': {
'Before': 'Code before 2', 'Before': 'Code before 2',
'After': 'Code after 2' 'After': 'Code after 2'

View File

@ -7,7 +7,7 @@ import pytest
class TestTryFixJson: class TestTryFixJson:
# Tests that JSON with complete 'Code suggestions' section returns expected output # Tests that JSON with complete 'Code suggestions' section returns expected output
def test_incomplete_code_suggestions(self): def test_incomplete_code_suggestions(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
expected_output = { expected_output = {
'PR Analysis': { 'PR Analysis': {
'Main theme': 'xxx', 'Main theme': 'xxx',
@ -18,7 +18,6 @@ class TestTryFixJson:
'General PR suggestions': '..., `xxx`...', 'General PR suggestions': '..., `xxx`...',
'Code suggestions': [ 'Code suggestions': [
{ {
'suggestion number': 1,
'relevant file': 'xxx.py', 'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]' 'suggestion content': 'xxx [important]'
} }
@ -28,7 +27,7 @@ class TestTryFixJson:
assert try_fix_json(review) == expected_output assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_new_line(self): def test_incomplete_code_suggestions_new_line(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...' review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n\t, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy [incomp...'
expected_output = { expected_output = {
'PR Analysis': { 'PR Analysis': {
'Main theme': 'xxx', 'Main theme': 'xxx',
@ -39,7 +38,6 @@ class TestTryFixJson:
'General PR suggestions': '..., `xxx`...', 'General PR suggestions': '..., `xxx`...',
'Code suggestions': [ 'Code suggestions': [
{ {
'suggestion number': 1,
'relevant file': 'xxx.py', 'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]' 'suggestion content': 'xxx [important]'
} }
@ -49,7 +47,7 @@ class TestTryFixJson:
assert try_fix_json(review) == expected_output assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_many_close_brackets(self): def test_incomplete_code_suggestions_many_close_brackets(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..' review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"} \n, {"suggestion number": 2, "relevant file": "yyy.py", "suggestion content": "yyy }, [}\n ,incomp.} ,..'
expected_output = { expected_output = {
'PR Analysis': { 'PR Analysis': {
'Main theme': 'xxx', 'Main theme': 'xxx',
@ -60,7 +58,6 @@ class TestTryFixJson:
'General PR suggestions': '..., `xxx`...', 'General PR suggestions': '..., `xxx`...',
'Code suggestions': [ 'Code suggestions': [
{ {
'suggestion number': 1,
'relevant file': 'xxx.py', 'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]' 'suggestion content': 'xxx [important]'
} }
@ -70,7 +67,7 @@ class TestTryFixJson:
assert try_fix_json(review) == expected_output assert try_fix_json(review) == expected_output
def test_incomplete_code_suggestions_relevant_file(self): def test_incomplete_code_suggestions_relevant_file(self):
review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"suggestion number": 1, "relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p' review = '{"PR Analysis": {"Main theme": "xxx", "Description and title": "Yes", "Type of PR": "Bug fix"}, "PR Feedback": {"General PR suggestions": "..., `xxx`...", "Code suggestions": [{"relevant file": "xxx.py", "suggestion content": "xxx [important]"}, {"suggestion number": 2, "relevant file": "yyy.p'
expected_output = { expected_output = {
'PR Analysis': { 'PR Analysis': {
'Main theme': 'xxx', 'Main theme': 'xxx',
@ -81,7 +78,6 @@ class TestTryFixJson:
'General PR suggestions': '..., `xxx`...', 'General PR suggestions': '..., `xxx`...',
'Code suggestions': [ 'Code suggestions': [
{ {
'suggestion number': 1,
'relevant file': 'xxx.py', 'relevant file': 'xxx.py',
'suggestion content': 'xxx [important]' 'suggestion content': 'xxx [important]'
} }

View File

@ -41,14 +41,6 @@ class TestParseCodeSuggestion:
expected_output = "\n" # modified to expect a newline character expected_output = "\n" # modified to expect a newline character
assert parse_code_suggestion(input_data) == expected_output assert parse_code_suggestion(input_data) == expected_output
# Tests that function returns correct output when 'suggestion number' key has a non-integer value
def test_non_integer_suggestion_number(self):
input_data = {
"Suggestion number": "one",
"Description": "This is a suggestion"
}
expected_output = " **Description:** This is a suggestion\n\n"
assert parse_code_suggestion(input_data) == expected_output
# Tests that function returns correct output when 'before' or 'after' key has a non-string value # Tests that function returns correct output when 'before' or 'after' key has a non-string value
def test_non_string_before_or_after(self): def test_non_string_before_or_after(self):
@ -64,7 +56,6 @@ class TestParseCodeSuggestion:
# Tests that function returns correct output when input dictionary does not have 'code example' key # Tests that function returns correct output when input dictionary does not have 'code example' key
def test_no_code_example_key(self): def test_no_code_example_key(self):
code_suggestions = { code_suggestions = {
'suggestion number': 1,
'suggestion': 'Suggestion 1', 'suggestion': 'Suggestion 1',
'description': 'Description 1', 'description': 'Description 1',
'before': 'Before 1', 'before': 'Before 1',
@ -76,7 +67,6 @@ class TestParseCodeSuggestion:
# Tests that function returns correct output when input dictionary has 'code example' key # Tests that function returns correct output when input dictionary has 'code example' key
def test_with_code_example_key(self): def test_with_code_example_key(self):
code_suggestions = { code_suggestions = {
'suggestion number': 2,
'suggestion': 'Suggestion 2', 'suggestion': 'Suggestion 2',
'description': 'Description 2', 'description': 'Description 2',
'code example': { 'code example': {